### import the downloaded data

In [1]:
import random
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors


In [2]:
df = pd.read_csv("popular_beer_ratings.csv")
df = df[df['review'] >= 4.50]
df = df.groupby(["user"])["item"].apply(lambda x: ",".join(x))
df = pd.DataFrame(df)
df.head()

Unnamed: 0_level_0,item
user,Unnamed: 1_level_1
Alieniloquium,"Pliny The Elder,Heady Topper,KBS (Kentucky Bre..."
BEERchitect,"Two Hearted Ale,Pliny The Elder,Heady Topper,K..."
Billolick,"Breakfast Stout,Pliny The Elder,Heady Topper,K..."
Bitterbill,"Enjoy By IPA,Zombie Dust,Alpha King,Trappist W..."
BuckeyeNation,"Breakfast Stout,Two Hearted Ale,Pliny The Elde..."


### create the model

In [3]:
class NNRecommender:
    def __init__(self, n_neighbors=5, separator=","):
        self.separator = separator
        self.cv = CountVectorizer(tokenizer=lambda x: x.split(separator))
        self.nn = NearestNeighbors(n_neighbors=n_neighbors)
        
    def __repr__(self):
        return f'NNRecommender(n_neighbors={self.nn.n_neighbors}, separator="{self.separator}")'
        
    def fit(self, X):
        self.X = X
        X = self.cv.fit_transform(X)
        self.nn.fit(X)
        return self

    def predict(self, X):
        Xp = []
        for Xi in X:
            Xt = self.cv.transform([Xi])
            neighbors = self.nn.kneighbors(Xt, return_distance=False)
            repos = []
            for n in neighbors[0]:
                r = self.X.iloc[int(n)].split(self.separator)
                repos.extend(r)
            repos = list(set(repos))
            repos = [r for r in repos if r not in Xi.split(self.separator)]
            Xp.append(repos)
        return Xp

In [4]:
model = NNRecommender(n_neighbors=5)
model.fit(df["item"])

NNRecommender(n_neighbors=5, separator=",")

In [5]:
df.sample(2)['item'].values

array(['Black Tuesday',
       "Breakfast Stout,Two Hearted Ale,Pliny The Elder,Heady Topper,Sculpin,Bourbon County Brand Stout,Enjoy By IPA,Zombie Dust,Lagunitas Sucks,Ten FIDY,Abt 12,Trappistes Rochefort 10,Hefeweissbier,Jai Alai IPA,Backwoods Bastard,Imperial Stout,The Abyss,Bourbon County Brand Coffee Stout,Victory At Sea - Coffee And Vanilla,Parabola,Trappist Westvleteren 12 (XII),Supplication,CBS (Canadian Breakfast Stout),Expedition Stout,Bomb!,Blind Pig,Sip Of Sunshine,Lunch,Consecration,Tank 7,Pseudo Sue,Old Chub - Scottish Style Ale,Péché Mortel,Red's Rye IPA,Westmalle Trappist Tripel,Abbey Ale,Abrasive Ale,Harvest Ale,Saison Dupont,Hunahpu's Imperial Stout,Xocoveza,Nelson,Focal Banger,Darkness,Daisy Cutter Pale Ale,Raspberry Tart,Temptation,Espresso Oak Aged Yeti Imperial Stout,Wisconsin Belgian Red"],
      dtype=object)

In [6]:
ipa = ["OAKED Arrogant Bastard Ale,Ruthless Rye IPA,World Wide Stout,Resin,Julius,Maudite,Franziskaner Hefe-Weisse,Westmalle Trappist Tripel,2XIPA,Abbey Ale,Abrasive Ale,Schneider Weisse Tap 6 Unser Aventinus,Oak Aged Yeti Imperial Stout,Kalamazoo Stout,Undercover Investigation Shut-down Ale,Vanilla Porter,Head Hunter,Dark Lord Imperial Stout,Samuel Adams Rebel IPA,Hop Nosh IPA"]

In [7]:
stout = ["Pilsner Urquell,Rodenbach Grand Cru,Smoked Porter,Raging Bitch,Darkness,The Mad Elf,Obsidian Stout,Black Tuesday,Nut Brown Ale,Plead The 5th Imperial Stout,Double Cream Stout,La Folie,Big Bad Baptist"]

In [8]:
model.predict(ipa)

[['Darkness', 'Palo Santo Marron']]

In [9]:
model.predict(stout)

[['Abrasive Ale']]

In [10]:
import cloudpickle

In [11]:
with open("model.pkl", "wb") as f:
    cloudpickle.dump(model, f)

In [12]:
del model

In [13]:
with open("model.pkl", "rb") as f:
    model = cloudpickle.load(f)

In [14]:
model.predict(ipa)

[['Darkness', 'Palo Santo Marron']]

In [15]:
features = model.cv.get_feature_names()
random_features = ", ".join(random.choices(features, k=10))
random_features

'abbey ale, edmund fitzgerald, maximus, centennial ipa, union jack ipa, hennepin, smoked porter, abbey ale, anchor liberty ale, trappist westvleteren 12 (xii)'