# Visualizing Data Using Plotly

In [1]:
import pandas as pd
from plotly.offline import init_notebook_mode, iplot
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import numpy as np
from scipy.stats import multivariate_normal
init_notebook_mode(connected=True)

In [2]:
reviews = pd.read_csv('data/winemag-data-130k-v2.csv.zip', index_col=0)
reviews

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


In [3]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 129971 entries, 0 to 129970
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   country                129908 non-null  object 
 1   description            129971 non-null  object 
 2   designation            92506 non-null   object 
 3   points                 129971 non-null  int64  
 4   price                  120975 non-null  float64
 5   province               129908 non-null  object 
 6   region_1               108724 non-null  object 
 7   region_2               50511 non-null   object 
 8   taster_name            103727 non-null  object 
 9   taster_twitter_handle  98758 non-null   object 
 10  title                  129971 non-null  object 
 11  variety                129970 non-null  object 
 12  winery                 129971 non-null  object 
dtypes: float64(1), int64(1), object(11)
memory usage: 13.9+ MB


In [4]:
reviews['country'].value_counts()

US                        54504
France                    22093
Italy                     19540
Spain                      6645
Portugal                   5691
Chile                      4472
Argentina                  3800
Austria                    3345
Australia                  2329
Germany                    2165
New Zealand                1419
South Africa               1401
Israel                      505
Greece                      466
Canada                      257
Hungary                     146
Bulgaria                    141
Romania                     120
Uruguay                     109
Turkey                       90
Slovenia                     87
Georgia                      86
England                      74
Croatia                      73
Mexico                       70
Moldova                      59
Brazil                       52
Lebanon                      35
Morocco                      28
Peru                         16
Ukraine                      14
Macedoni

In [5]:
n = 100
fig = px.scatter(reviews.sample(n),
                 x='points', 
                 y='price',
                 title='Wine Price compared to Rating Worldwide',
                 color='points',
                 color_continuous_scale='redor',
                 log_y=True
                )
fig.show()

In [6]:
def plot_price_vs_rating(country):
    
    reviews_country = reviews[reviews['country'] == country]
    if len(reviews_country) > 100:
        n = 100
    else:
        n = len(reviews_country)
    fig = px.scatter(reviews_country.sample(n),
                     x='points',
                     y='price', 
                     title='Wine Price compared to Rating for {}'.format(country),
                     color='points',
                     color_continuous_scale='redor',
                     log_y=True)
    fig.show()

In [7]:
plot_price_vs_rating('Italy')

In [13]:
reviews_by_country = reviews.groupby('country')[['price', 'points']].mean()
fig = px.choropleth(reviews_by_country,
                    locationmode='country names',
                    locations=list(reviews_by_country.index),
                    hover_name=list(reviews_by_country.index),
                    color='points',
                    color_continuous_scale='redor',
                    title='Average Wine Points Worldwide')
fig.show()

In [9]:
reviews_sample = reviews.sample(20)
reviewers = list(reviews_sample['taster_name'].value_counts()[0:10].index)
varieties = list(reviews_sample['variety'].value_counts()[0:5].index)

In [10]:
variety_points = pd.DataFrame(reviews.loc[reviews['taster_name'].isin(reviewers) & reviews['variety'].isin(varieties)].groupby(['taster_name', 'variety'])['points'].max())
variety_points.reset_index(inplace=True)
variety_points

Unnamed: 0,taster_name,variety,points
0,Jim Gordon,Cabernet Franc,92
1,Jim Gordon,Chardonnay,94
2,Jim Gordon,Pinot Grigio,90
3,Jim Gordon,Pinot Noir,96
4,Michael Schachner,Cabernet Franc,93
5,Michael Schachner,Chardonnay,92
6,Michael Schachner,Pinot Grigio,87
7,Michael Schachner,Pinot Noir,93
8,Michael Schachner,Tempranillo Blend,96
9,Paul Gregutt,Cabernet Franc,94


In [11]:
fig = go.Figure(data=go.Heatmap(x=variety_points['taster_name'],
                               y=variety_points['variety'],
                               z=variety_points['points'],
                               colorscale='redor',
                               connectgaps=True))
fig.update_layout(title='Varieties and their Points According to Top Raters')
fig.show()

In [12]:
reviews_sample = reviews.sample(800).dropna()

mu_price = reviews_sample['price'].mean()
mu_points = reviews_sample['points'].mean()
var_price = reviews_sample['price'].var()
var_points = reviews_sample['points'].var()

prices = np.array(reviews_sample['price'])
points = np.array(reviews_sample['points'])

prices, points = np.meshgrid(prices, points)

pos = np.empty(prices.shape + (2,))
pos[:, :, 0] = prices; pos[:, :, 1] = points

rv = multivariate_normal([mu_price, mu_points], [[var_price, 0], [0, var_points]])

fig = go.Figure(data=go.Surface(x=prices, y=points, z=rv.pdf(pos)))
fig.show()