In [12]:
import pickle
from sklearn.neighbors import NearestNeighbors
import numpy
import json
import pandas as pd

In [3]:
with open('./data/model_pickle', 'rb') as f:
    pickle = pickle.load(f)

In [4]:
nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)

In [6]:
nn.fit(pickle)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=-1, n_neighbors=20, p=2,
                 radius=1.0)

In [9]:
negative_list = ['anxious', 'dizzy', 'dry eyes', 'dry mouth', 'headache', 'paranoid']
effect_list = ['creative', 'energetic', 'euphoric', 'focused', 'happy', 'hungry', 'relaxed', 'sleepy']
ailment_list = ['anxiety', 'depression', 'fatigue', 'headaches', 'lack of appetite', 'pain', 'stress']

# columns so we can convert values to pandas series
columns = ['anxious', 'dizzy', 'dry eyes', 'dry mouth', 'headache', 'paranoid', 'creative', 'energetic', 
           'euphoric', 'focused', 'happy', 'hungry', 'relaxed', 'sleepy', 'anxiety', 'depression', 'fatigue', 
           'headaches', 'lack of appetite', 'pain', 'stress']

In [19]:
# strain csv with strain ID as index
strains_df = pd.read_csv('./data/strain.csv', index_col=['strain_id'])

In [36]:
def recommend(request: dict, n: int=10):
    """
    creates list with top n recommended strains.
    
    Paramaters
    __________
    
    request: dictionary (json object)
        list of user's desired effects listed in order of user ranking.
        {
            "effects":[],
            "negatives":[],
            "ailments":[]
        }
    n: int, optional
        number of recommendations to return, by default 10.
        
    Returns 
    _______
    
    recs
        returns JSON object of any error messages and
        recommended strains.
    """
    #error_messages = []
    desired_dict = json.loads(request)
    effects, negatives, ailments = (
        desired_dict.get("effects"), 
        desired_dict.get("negatives"),
        desired_dict.get("ailments")
    )
    effects = [effect.lower() for effect in effects]
    negatives = [negative.lower() for negative in negatives]
    ailments = [ailment.lower() for ailment in ailments]
    
    # check to make sure they are all valid
#     extra_effects = set(effects) - set(effect_list)
#     extra_negatives = set(negatives) - set(negative_list)
#     extra_ailments = set(ailments) - set(ailment_list)
#     if extra_effects:
#         error_messages.append(f'Effects not in dataset: {extra_effects}')
#     if extra_negatives:
#         error_messages.append(f'Negatives not in dataset: {extra_negatives}')
#     if extra_ailments:
#         error_messages.append(f'Ailments not in dataset: {extra_ailments}')
    
    for index, effect in enumerate(effects):
        if effect in columns:
            effects[index] = columns.index(effect)

    for index, negative in enumerate(negatives):
        if negative in columns:
            negatives[index] = columns.index(negative)

    for index, ailment in enumerate(ailments):
        if ailment in columns:
            ailments[index] = columns.index(ailment)

    print(effects, negatives, ailments)

    vector = [
        0 for _ in range(len(columns))
    ]    
    
    weight = 100

    for index in effects:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)

    weight = 100

    for index in negatives:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)
    
    weight = 100

    for index in ailments:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)

    data = numpy.array(vector)
    request_series = pd.Series(data,index=columns)
    distance, neighbors = nn.kneighbors([request_series])
    
    
    list_strains = []
    for points in neighbors:
        for index in points:
            list_strains.append(index)
            
    recommended = strains_df.iloc[list_strains].head(n)
    result = {
        #"errors": ", ".join(error_messages),
        "strains": recommended.to_dict("records")
    }
    
    return result

#json.dumps(result)

In [37]:
# TEST

request = json.dumps(
    {
        "effects":["happy", "euphoric", "creative", "junk"],
        "ailments":["anxiety", "depression", "pain"],
        "negatives":["dry mouth", "paranoid", "dizzy"]
    }
)

print(recommend(request))

[10, 8, 6, 'junk'] [3, 5, 1] [14, 15, 19]
{'strains': [{'name': 'alien-stardawg', 'type': 'sativa', 'rating': 4.4}, {'name': '100-og', 'type': 'hybrid', 'rating': 4.0}, {'name': 'afghan-hawaiian', 'type': 'indica', 'rating': 4.1}, {'name': 'bio-diesel', 'type': 'hybrid', 'rating': 4.4}, {'name': 'magnum-pi', 'type': 'sativa', 'rating': 4.3}, {'name': 'chocolate-diesel', 'type': 'sativa', 'rating': 4.6}, {'name': 'alaskan-thunder-fuck', 'type': 'sativa', 'rating': 4.4}, {'name': 'cherry-grapefruit', 'type': 'hybrid', 'rating': 4.3}, {'name': 'kill-bill', 'type': 'hybrid', 'rating': 4.4}, {'name': 'grapefruit-haze', 'type': 'sativa', 'rating': 4.4}]}


In [35]:
strains_df['name'].head(50)

strain_id
3534               100-og
3535                 1024
3536             13-dawgs
3537             24k-gold
3538              3-kings
3539               303-og
3540               3d-cbd
3541             3x-crazy
3542             501st-og
3543          5th-element
3544         707-headband
3545          8-ball-kush
3546               818-og
3547               831-og
3548       9-pound-hammer
3549             91-krypt
3550                 a-10
3551                a-dub
3552              a-train
3553                 acdc
3554                ak-47
3555                ak-48
3556             aberdeen
3557           abusive-og
3558        acapulco-gold
3559        ace-killer-og
3560        ace-of-spades
3561                 aceh
3562           acid-dough
3563              afcrack
3564       afghan-big-bud
3565           afghan-cow
3566        afghan-diesel
3567      afghan-hawaiian
3568          afghan-haze
3569          afghan-kush
3570         afghan-skunk
3571     afghan-sour-kush
35

In [47]:
# change function to only output strain_ids

def recommend(request: dict, n: int=10):
    """
    creates list with top n recommended strains.
    
    Paramaters
    __________
    
    request: dictionary (json object)
        list of user's desired effects listed in order of user ranking.
        {
            "effects":[],
            "negatives":[],
            "ailments":[]
        }
    n: int, optional
        number of recommendations to return, default 10.
        
    Returns 
    _______
    
    list_strains: python list of n recommended strains.
    """
    desired_dict = json.loads(request)
    effects, negatives, ailments = (
        desired_dict.get("effects"), 
        desired_dict.get("negatives"),
        desired_dict.get("ailments")
    )
    effects = [effect.lower() for effect in effects]
    negatives = [negative.lower() for negative in negatives]
    ailments = [ailment.lower() for ailment in ailments]
    
    for index, effect in enumerate(effects):
        if effect in columns:
            effects[index] = columns.index(effect)

    for index, negative in enumerate(negatives):
        if negative in columns:
            negatives[index] = columns.index(negative)

    for index, ailment in enumerate(ailments):
        if ailment in columns:
            ailments[index] = columns.index(ailment)

    vector = [
        0 for _ in range(len(columns))
    ]    
    
    weight = 100

    for index in effects:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)

    weight = 100

    for index in negatives:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)
    
    weight = 100

    for index in ailments:
        if isinstance(index, int):
            vector[index] = weight
            weight *= .8
            weight = int(weight)

    data = numpy.array(vector)
    request_series = pd.Series(data,index=columns)
    distance, neighbors = nn.kneighbors([request_series])
    
    list_strains = []
    for points in neighbors:
        for index in points:
            list_strains.append(index)

    return list_strains[:n]
            
#     recommended = strains_df.iloc[list_strains].head(n)
#     result = {
#         #"errors": ", ".join(error_messages),
#         "strains": recommended.to_dict("records")
#     }
    
#     return result

# #json.dumps(result)

In [49]:
# TEST

request = json.dumps(
    {
        "effects":["happy", "euphoric", "creative", "junk"],
        "ailments":["anxiety", "depression", "pain"],
        "negatives":["dry mouth", "paranoid", "dizzy"]
    }
)

recommend(request, 5)

[72, 0, 33, 169, 988]