# Ready to find your next craft beer?

### Let's get started! Run the next cell. 

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import ipywidgets as widgets
from ipywidgets import Layout

### Run the following cell to create the list of craft beers for selection.

In [2]:
#Creates list of craft beers for selection
df_2 = pd.read_csv('beer_data.csv', na_values=['none'])
beer_list = df_2['beer_name'].to_list()
beer_list = sorted(beer_list)

#Creates dropdown menu
beer_selection = widgets.Dropdown(
    options=[x for x in beer_list],
    style = {'description_width': 'initial'},
    description='Select Your Favorite Craft Beer:',
    disabled=False,
    )

### Select your favorite craft beer from the dropdown menu and run the next two cells.

In [3]:
display(beer_selection)

Dropdown(description='Select Your Favorite Craft Beer:', options=('#001 Golden Amber Lager', '#002 American I.…

In [4]:
dropD_value = beer_selection.value

In [5]:
def beer_recommender():
    df = pd.read_csv('beer_data.csv', na_values=['none'])
    df = df.drop(columns='Unnamed: 0')

    #Creating column to be used as index
    df['beer_info'] = df['beer_name']
    df = df.set_index('beer_info')

    #Converting everything to string for concat
    df = df.astype(str)

    #Create description column
    df['description'] = df['abv'] + ' ' + df['beer_style'] + ' ' + df['brewery_name'] + ' ' + df['city'] + ' ' + df['state']
    df_vect = df[['beer_name', 'description']]
    df_vect = df_vect.reset_index(drop = True)
    
    #Set up vectorizer
    tfidfvec = TfidfVectorizer(min_df=2, max_df=0.8)
    
    #Apply and convert to array
    vectorized_df = tfidfvec.fit_transform(df_vect.description)

    vectorized_df = vectorized_df.toarray()
    
    #Create dataframe and re-set index
    tfidf_df = pd.DataFrame(vectorized_df, columns=tfidfvec.get_feature_names())

    tfidf_df.index = df_vect['beer_name']
    
    #Set up cosine similarity on tfidf dataframe
    cosine_similarity_array = cosine_similarity(tfidf_df)
    cosine_sim_df = pd.DataFrame(cosine_similarity_array, index=tfidf_df.index, columns=tfidf_df.index)
    
    beer_drinker = dropD_value
    cosine_sim_bd = cosine_sim_df.loc[beer_drinker]
    bd_recommend = cosine_sim_bd.sort_values(ascending= False)
    bd_recommend = bd_recommend.to_frame()
    return bd_recommend.head(11)

### This is it! Run the next cell to see your recommended beers.

In [6]:
print('Top 10 Recommended Beers')
beer_recommender()

Top 10 Recommended Beers


Unnamed: 0_level_0,#001 Golden Amber Lager
beer_name,Unnamed: 1_level_1
#001 Golden Amber Lager,1.0
#004 Session I.P.A.,0.764878
#003 Brown & Robust Porter,0.739742
#002 American I.P.A.,0.732559
BrewFarm Select Golden Lager,0.479757
Marblehead,0.402243
RT Lager,0.372951
California Route,0.328097
Track 1 Amber Lager,0.324902
Lizzy's Red,0.302993
