In [9]:
import plotly
import plotly.express as px
import numpy as np

from class_definitions import *
import pandas as pd
from vivino_db import database
db = database.db
db.db_engine.table_names()

In [2]:
tables = {}
for table in db.db_engine.table_names():
    tables[table]=pd.read_sql_table(table,db.db_engine)

In [350]:
df = pd.read_csv('dataset.csv')

In [351]:
df.shape

(128, 30)

### Countries, Regions

In [301]:
fig = px.sunburst(temp,path=['country_name','Region/Appellation'])
fig.show(renderer='browser')

In [357]:
fig = px.sunburst(temp,path=['varietal_name','Region/Appellation'])
fig.show(renderer='browser')

In [285]:
fig = px.box(df[df['ratings_average']!=0],x='country_name',y='ratings_average',points='all',hover_name='name')
fig.update_traces(quartilemethod="exclusive")
fig.show(renderer='browser')

In [289]:
fig = px.box(df[df['ratings_average']!=0],x='country_name',y='ranking_global',points='all',hover_name='name')
fig.update_traces(quartilemethod="exclusive")
fig.show(renderer='browser')

### Type,Wine Style,Grape/Blend

In [40]:
fig = px.sunburst(df,path=['type','Wine Style','Grape/Blend'],
                  color='acidity',color_continuous_scale='PuBu',
                  color_continuous_midpoint=np.average(df['acidity'])
                 )
fig.show(renderer='browser')

In [None]:
fig = px.sunburst(df,path=['type','Wine Style','Grape/Blend'],
                  color='body',color_continuous_scale='PuBu',
                  color_continuous_midpoint=np.average(df['body'])
                 )
fig.show(renderer='browser')

### Flavor Wheel

In [359]:
tasting_notes=joblib.load('Wines/tasting_notes.pkl')

records = []
for t in tasting_notes:
    d = {}
    for k,v in t.items():
        d['id']=k
        d['notes']=list(
            {x['note'].split(' mentions of')[-1].replace(' notes','').strip() for x in v})
        d['tags']=list({x['tag'] for x in v})
    records.append(d)

records[0]

In [188]:
nt_map = []
count = 0 
for t in tasting_notes:
    d = {}
    for k,value in t.items():
        if k not in df['id'].to_list():
            continue 
        for v in value:
            d = {'id':k}
            d['note'] = v['note'].split('mentions of')[-1].replace(' notes','').strip()
            d['tag']=v['tag']
            nt_map.append(d)

In [189]:
taste_df = pd.DataFrame.from_records(nt_map).drop_duplicates()
fig = px.sunburst(taste_df,path=['note','tag'])
fig.show(renderer='browser')

### Co-occurrence of Tasting Notes

In [334]:
from chord import Chord
import itertools

In [335]:
# get top 3 tasting notes
records = []
for t in tasting_notes:
    d = {}
    for k,v in t.items():
        d['id']=k
        for i in range(0,3):
            for value in v:
                if value['index']==i:
                    note = value['note'].split(' mentions of ')[-1].replace('notes','').strip()
                    d[f"note_{i+1}"]=note
        
    records.append(d)

In [336]:
co_occ = pd.DataFrame.from_records(records)
co_occ = co_occ[['note_1','note_2']].dropna()
co_occ = list(itertools.chain.from_iterable((i, i[::-1]) for i in co_occ.values))

In [340]:
matrix = pd.pivot_table(
    pd.DataFrame(co_occ), index=0, columns=1, aggfunc="size", fill_value=0
).values.tolist()
names = np.unique(co_occ).tolist()

In [340]:
chord_diagram = Chord(matrix,names,colors=colors,wrap_labels=True,padding=.01,width=700)
chord_diagram.to_html('images/co')

### Ratings by Type

In [354]:
df['Wine Style'].unique()

array(['Savory and Classic', 'Rich and Intense', 'Bold and Structured',
       'Light and Perfumed', 'Tropical and Balanced', 'Green and Flinty',
       'Aromatic and Floral', 'Buttery and Complex', 'Fresh and Youthful',
       'Lush and Balanced', 'Berries and Cream',
       'Complex and Traditional', 'Crisp and Dry'], dtype=object)