### Model (sklearn)  to Flask

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv("data/candy.csv")
df = df[df['review'] >= 5]

Prep for:

In [3]:
df = pd.read_csv("data/candy.csv")
df = df[df['review'] >= 5]
df = df.groupby(["user"])["item"].apply(lambda x: ",".join(x))
df = pd.DataFrame(df)
df.head()

Unnamed: 0_level_0,item
user,Unnamed: 1_level_1
aaron67,"Kit Kat Minis Crisp Wafers in Milk Chocolate,R..."
aaron68,"Brookside Dark Chocolate Pomegranate Flavor,Re..."
aaron73,Dove Chocolate Promises Silky Smooth Sea Salt ...
abarker,"Reese's Peanut Butter Bunny,Ghirardelli Gourme..."
abigail04,"Kit Kat Minis Crisp Wafers in Milk Chocolate,P..."


### Putting a bow on the Katacoda

In [4]:
class NNRecommender:
    def __init__(self, n_neighbors=5, separator=","):
        self.separator = separator
        self.cv = CountVectorizer(tokenizer=lambda x: x.split(separator))
        self.nn = NearestNeighbors(n_neighbors=n_neighbors)
        
    def __repr__(self):
        return f'NNRecommender(n_neighbors={self.nn.n_neighbors}, separator="{self.separator}")'
        
    def fit(self, X):
        self.X = X
        X = self.cv.fit_transform(X)
        self.nn.fit(X)
        return self

    def predict(self, X):
        Xp = []
        for Xi in X:
            Xt = self.cv.transform([Xi])
            neighbors = self.nn.kneighbors(Xt, return_distance=False)
            Y = []
            for n in neighbors[0]:
                y = self.X.iloc[int(n)].split(self.separator)
                Y.extend(y)
            Y = list(set(Y))
            Y = [y for y in Y if y not in Xi.split(self.separator)]
            Xp.append(Y)
        return Xp

In [5]:
model = NNRecommender(n_neighbors=5)
model.fit(df["item"])

NNRecommender(n_neighbors=5, separator=",")

In [6]:
df.sample(1)['item'].values

array(["Milky Way Candy Bar,Almond Joy Snack Size Bites,Hershey's Nuggets Milk Chocolate with Almonds,Butterfinger Candy Bar,Werther's Original Caramel Hard Candies,Hershey's Kisses Milk Chocolates with Almonds,Hershey's Symphony Milk Chocolate with Almonds and Toffee Bar"],
      dtype=object)

In [7]:
sweet = ["Airheads Xtremes Sweetly Sour Candy Rainbow Berry,Life Savers Five Flavor Gummies,Twizzlers Pull-N-Peel Candy Cherry"]

In [8]:
peanut = ["Reese's Peanut Butter Cups Miniatures,M&Ms Peanut Chocolate Candy,Reese's Peanut Butter Big Cup"]

In [9]:
model.predict(sweet)

[["Werther's Original Caramel Hard Candies",
  'Starburst Tropical Fruit Chews Candy',
  "Hershey's Whoppers Malted Milk Balls",
  'Trolli Sour Brite Eggs Candy',
  'Nestle Butterfinger Bites']]

In [10]:
model.predict(peanut)

[["Reese's Outrageous King Size Bar",
  'Snickers Chocolate Bar',
  "Reese's Peanut Butter Egg"]]

In [11]:
import cloudpickle

In [12]:
with open("model.pkl", "wb") as f:
    cloudpickle.dump(model, f)

In [13]:
del model

In [14]:
with open("model.pkl", "rb") as f:
    model = cloudpickle.load(f)

In [15]:
model.predict(peanut)

[["Reese's Outrageous King Size Bar",
  'Snickers Chocolate Bar',
  "Reese's Peanut Butter Egg"]]