In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('cannabis.csv')

In [5]:
df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."


In [6]:
df.shape

(2351, 6)

In [7]:
df = df.dropna(subset = ['Description'])

In [8]:
df.shape

(2318, 6)

### Vectorizer

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [10]:
#Instantiate Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf = tfidf.fit(df['Description'])

In [11]:
#Pickle
import pickle 

filename = 'vect_01.pkl'
pickle.dump(tfidf, open(filename, 'wb'))

### Model

In [12]:
sparse = tfidf.transform(df['Description'])
#send sparse matrix dataframe
tfidf_dtm = pd.DataFrame(sparse.todense(), columns=tfidf.get_feature_names())

In [13]:
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
nn.fit(tfidf_dtm)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [14]:
filename = 'knn_01.pkl'
pickle.dump(nn, open(filename, 'wb'))

### API

In [15]:
#import vectorizer and model
import pickle 
import pandas as pd

tfidf = pickle.load(open("vect_01.pkl", "rb"))
nn = pickle.load(open("knn_01.pkl", "rb"))

In [16]:
def recommend(request):
   # Transform
    request = pd.Series(request)
    request_sparse = tfidf.transform(request)

    # Send to df
    request_tfidf = pd.DataFrame(request_sparse.todense())

    # Return a list of indexes
    top5 = nn.kneighbors([request_tfidf][0], n_neighbors=5)[1][0].tolist()
    
    # Send recomendations to DataFrame
    recommendations_df = df.iloc[top5]
    
    return recommendations_df
    

In [17]:
# Create a fake weed review
fake_input = """nice cherry is an indica-dominant strain that captures the flavorful qualities of its cherry parent and the relaxing attributes of mr. nice. with an aroma of sweet skunk, pine, and berry, nice cherry delivers a rush of cerebral energy that lifts the mood while relaxing the body. 
it’ll also bring an edge back to your appetite while providing focus to keep you productive."""

In [18]:
# Test request function
top5 = recommend(fake_input)

In [19]:
top5

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
1458,Nice-Cherry,indica,4.6,"Happy,Talkative,Uplifted,Relaxed,Hungry","Sweet,Berry,Pungent",Nice Cherry is an indica-dominant strain that ...
1749,Rare-Darkness,indica,0.0,,,
1653,Proper-Pho-Shatter-I,indica,0.0,,,
1652,Proper-Pho-Shatter-H,hybrid,0.0,,,
1651,Proper-Pho-Shatter-S,sativa,0.0,,,
