In [1]:
# dependencies
import pandas as pd
import numpy as np
import re
import nltk

# read in CSV
all_wines = pd.read_csv('Data/winemag-data-130k-v2.csv')
all_wines.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [3]:
# filling in known variety for nulls
all_wines.iloc[86909, 7] = 'Syrah-Petite Sirah'

# drop unneeded columns
clean_wines = all_wines.drop(columns=['Unnamed: 0', 'country', 'designation', 'province', \
                                      'region_1', 'region_2', 'taster_name', 'taster_twitter_handle', \
                                      'variety', 'winery'])

# sample
clean_wines.sample(10)

Unnamed: 0,description,points,price,title
57553,With the velvet texture typical of the appella...,89,50.0,Jean-Luc and Paul Aegerter 2014 Vieilles Vigne...
26602,Still gritty in tannins at nearly four years o...,89,45.0,William Hill Estate 2007 Bench Blend Cabernet ...
63089,This dry wine offers light fruitiness and more...,84,,Domaine du Penlois 2013 Rosé (Beaujolais Rosé)
126060,"This “white label” blend is broad and lush, an...",88,29.0,Man O' War 2009 Merlot-Cabernet (Waiheke Island)
121090,"Made with certified organically grown grapes, ...",87,12.0,La Traiana 2010 Alò Red (Toscana)
11414,"The best vintage to date for this wine, delive...",91,30.0,Jaxon 2014 Grenache (Rogue Valley)
25692,There's a ripe orange-oil streak in the backgr...,88,15.0,Constantia Glen 2010 Sauvignon Blanc (Constantia)
40943,"Very stiff and tannic, with hard, stemmy flavo...",82,17.0,Hoodsport 2005 Cabernet Merlot (Columbia Valle...
94184,There's a sugary taste to this blend of French...,85,13.0,HandCraft 2012 Inspiration White (California)
83279,Saucy plum and raspberry aromas are simple but...,86,14.0,Viu Manent 2012 Gran Reserva Cabernet Sauvigno...


In [4]:
# drop the remaining nulls
clean_wines = clean_wines.dropna()

# clean title
clean_wines['title'] = clean_wines['title']

# sample
clean_wines.sample(10)

Unnamed: 0,description,points,price,title
83815,"Exquisitely fragrant notes of ripe, aromatic a...",93,57.0,Zusslin 2012 Clos Liebenberg Sparkling (Créman...
82690,"A lovely Zinfandel, soft and dry, with lusciou...",88,14.0,Perry Creek 2007 Zinman Zinfandel (El Dorado)
82725,"Subtle, alluring aromas of earth, cola, eucaly...",93,90.0,Montes 2012 Alpha M Red (Colchagua Valley)
35442,"This taut, tight wine is impressive. The miner...",91,50.0,Waris Hubert 2011 Blanche Grand Cru Blanc de B...
76046,Yellow flower and natural rubber come forward ...,86,11.0,Caruso & Minini 2007 Terre di Giumara Inzolia ...
8780,This Napa-side-of-Carneros Chardonnay is respl...,89,22.0,Starmont 2012 Chardonnay (Carneros)
116272,"Full bodied and heavily extracted, this is a d...",90,23.0,Château de Ségriès 2009 Cuvée Reservée Red (Li...
78977,Spice and smoky oak aromas blend with perfumed...,85,17.0,Domaine Jean Bousquet 2013 Reserve Made with O...
128571,This textured wine is tight with layers of ner...,87,15.0,Mas de Cadenet 2014 Arbaude Rosé (Côtes de Pro...
44870,Pascal Sirat named this wine for his father an...,86,15.0,Château Panchille 2016 Blanc de Fernand (Bord...


In [7]:
def parse_adj(text):
    tags = ['JJ']
    return_text = []
    
    split_text = text.split()
    
    for word in split_text:
        return_text.append([a[0] for a in nltk.pos_tag(word) if a[1] in tags])
        
    return return_text

In [8]:
clean_wines['adjectives'] = clean_wines['description'].apply(lambda x: parse_adj(x))
clean_wines.sample(10)

Unnamed: 0,description,points,price,title,adjectives
64471,A strong hazelnut aroma meets poached pear and...,88,30.0,Portola Vineyards 2014 Chardonnay (Santa Cruz ...,"[[], [], [n, u], [], [], [], [], [n], [t], [n]..."
73958,This ruby colored Pinot Noir has aromas of bla...,85,9.0,Budureasca 2014 Vine in Flames Pinot Noir (Dea...,"[[], [], [l], [], [], [], [], [], [], [e], [n]..."
90626,This Sangiovese hits the palate in a very dire...,83,32.0,Costa Archi 2008 Monte Brullo (Sangiovese di ...,"[[], [n], [], [], [], [], [], [], [], [n], [],..."
2347,Made by an Oregon winery from Carneros region ...,85,29.0,Ardiri Winery and Vineyards 2012 Pinot Noir (C...,"[[], [], [], [], [], [f], [], [], [], [], [], ..."
37985,"With some maturity, this is a rich, full-bodie...",92,26.0,Bailly-Lapierre 2012 Egarade Brut (Crémant de...,"[[], [], [u], [], [], [], [], [f, u], [], [s],..."
25252,"This is a warm and round wine from Irancy, sou...",87,25.0,Simonnet-Febvre 2012 Irancy,"[[], [], [], [], [n], [n], [], [f], [n], [], [..."
120181,"Starts off smelling vegetal, although with bre...",84,54.0,Testarossa 2007 Graham Family Vineyard Pinot N...,"[[], [o], [s], [], [o, u], [], [i], [], [e], [..."
84970,"A fragrant, fresh rosé that is full of red fru...",86,30.0,Domaine Matthias et Emile Roblin 2013 Origine ...,"[[], [n], [f], [], [], [], [f, u], [], [], [f]..."
9325,"Despite the name, this is the value offering, ...",89,28.0,Soléna 2013 Grande Cuvée Pinot Noir (Willamett...,"[[], [], [], [], [], [], [u], [o], [n], [], []..."
77503,Castello di Semivicoli is a thick and succulen...,90,45.0,Masciarelli 2006 Castello di Semivicoli (Treb...,"[[s], [], [i, o], [], [], [i], [n], [u, u], [s..."


In [None]:
# dependencies
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# read clean csv
clean_wines = pd.read_csv('./Data/recommend_neural_model_df.csv')

# sample
clean_wines.sample(10)

In [None]:
# LabelEncoder dependency
from sklearn.preprocessing import LabelEncoder
number = LabelEncoder()

# create target/features
target = clean_wines['quality']
target_names = ['Good', 'Poor']

data = clean_wines[['country','price','province','variety','winery','vintage']]
data['country'] = number.fit_transform(data['country'].astype('str'))
data['province'] = number.fit_transform(data['province'].astype('str'))
data['variety'] = number.fit_transform(data['variety'].astype('str'))
data['winery'] = number.fit_transform(data['winery'].astype('str'))

feature_names = data.columns

# sample
data.sample(10)

In [None]:
# RandomForest dependencies
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# split data
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=41)

# model fit/score
rf = RandomForestClassifier(n_estimators=100)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

In [None]:
# accuracy of model
accuracy = round(rf.score(X_train, y_train) * 100, 2)
print(accuracy)

In [None]:
# dependencies
import pandas as pd
import tabpy_client
from tabpy.tabpy_tools.client import Client
number = LabelEncoder()

def wine_predictor(_arg1, _arg2, _arg3, _arg4, _arg5, _arg6):
    row = {'country': _arg1,
           'price': _arg2,
           'province': _arg3,
           'variety': _arg4,
           'winery': _arg5,
           'vintage': _arg6}

    test_features = pd.DataFrame(data=row, index=[0])
    test_features['country'] = number.fit_transform(test_features['country'].astype('str'))
    test_features['province'] = number.fit_transform(test_features['province'].astype('str'))
    test_features['variety'] = number.fit_transform(test_features['variety'].astype('str'))
    test_features['winery'] = number.fit_transform(test_features['winery'].astype('str'))
    
    predict_quality_prob = rf.predict_proba(test_features)
    return [probability[1] for probability in predict_quality_prob]

client = tabpy_client.Client('http://localhost:9004/')
client.deploy('wine_predictor', wine_predictor, 'Predicts probability of good quality.', override=True)