### import the downloaded data

In [None]:
import random
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors


In [None]:
df = pd.read_csv("popular_beer_ratings.csv")
df = df[df['review'] >= 4.50]
df = df.groupby(["user"])["item"].apply(lambda x: ",".join(x))
df = pd.DataFrame(df)
df.head()

### create the model

In [None]:
class NNRecommender:
    def __init__(self, n_neighbors=5, separator=","):
        self.separator = separator
        self.cv = CountVectorizer(tokenizer=lambda x: x.split(separator))
        self.nn = NearestNeighbors(n_neighbors=n_neighbors)
        
    def __repr__(self):
        return f'NNRecommender(n_neighbors={self.nn.n_neighbors}, separator="{self.separator}")'
        
    def fit(self, X):
        self.X = X
        X = self.cv.fit_transform(X)
        self.nn.fit(X)
        return self

    def predict(self, X):
        Xp = []
        for Xi in X:
            Xt = self.cv.transform([Xi])
            neighbors = self.nn.kneighbors(Xt, return_distance=False)
            repos = []
            for n in neighbors[0]:
                r = self.X.iloc[int(n)].split(self.separator)
                repos.extend(r)
            repos = list(set(repos))
            repos = [r for r in repos if r not in Xi.split(self.separator)]
            Xp.append(repos)
        return Xp

In [None]:
model = NNRecommender(n_neighbors=5)
model.fit(df["item"])

In [None]:
df.sample(2)['item'].values

In [None]:
ipa = ["OAKED Arrogant Bastard Ale,Ruthless Rye IPA,World Wide Stout,Resin,Julius,Maudite,Franziskaner Hefe-Weisse,Westmalle Trappist Tripel,2XIPA,Abbey Ale,Abrasive Ale,Schneider Weisse Tap 6 Unser Aventinus,Oak Aged Yeti Imperial Stout,Kalamazoo Stout,Undercover Investigation Shut-down Ale,Vanilla Porter,Head Hunter,Dark Lord Imperial Stout,Samuel Adams Rebel IPA,Hop Nosh IPA"]

In [None]:
stout = ["Pilsner Urquell,Rodenbach Grand Cru,Smoked Porter,Raging Bitch,Darkness,The Mad Elf,Obsidian Stout,Black Tuesday,Nut Brown Ale,Plead The 5th Imperial Stout,Double Cream Stout,La Folie,Big Bad Baptist"]

In [None]:
model.predict(ipa)

In [None]:
model.predict(stout)

In [None]:
import cloudpickle

In [None]:
with open("model.pkl", "wb") as f:
    cloudpickle.dump(model, f)

In [None]:
del model

In [None]:
with open("model.pkl", "rb") as f:
    model = cloudpickle.load(f)

In [None]:
model.predict(ipa)

In [None]:
features = model.cv.get_feature_names()
random_features = ", ".join(random.choices(features, k=10))
random_features