## Global historical Wine Trade/Consumption
(Megafile_of_Global_Wine_Data_1835_to_2016)

In [14]:
#%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps
import requests
import json
import plotly.graph_objs as go
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
from plotly.offline import plot

from api_keys import (owmkey,gkey)

# Analyzing global historial wine trade (Working on Giant MS-Excel File)
This function reads the name of worksheet within the gian Excel Worksheet (string) 
together with the number of  bottom redundant rows (brr) and top header rows (thr) (two integers)
and return the lcean dataframe with the year as index.
Note: the thr and brr arguments are optional but the sheet_name is required.

In [2]:
# removing the unnecessary row from top or bottom
# relocating the columns and removing unnecessary ones
# reset indexing
def giant_worksheet(sheet_giant,brr=0, thr=0):
    df = pd.read_excel('../data/rawdata/Megafile_of_global_wine_data_1835_to_2016_1217.xlsx', 
                         sheet_name=sheet_giant, header=1, index_col=None)
    df.index.rename('year',inplace=True)
    df=df.reset_index()
    df.fillna(-1,inplace=True) #fills empty cells with -1
    
    #relocating the Norway column if exist
    try:
        nor = df['Norway']
        df.drop(labels=['Norway'], axis=1,inplace = True)
        df.insert(6, 'Norway', nor)
    except:
        print('NO Norway column found in worksheet:', sheet_giant)

    #dfmf.drop(labels=['nan'], axis=1,inplace = True)
    df=df.rename(columns={'Bel-Lux':'Belgium'}) #chnage the name of Be-Lux to Belgium
    df.drop(labels=['Coeff. of variation'], axis=1,inplace = True) #remove the coeff of var column.
    df.drop(labels=['Other'], axis=1,inplace = True) #remove the Other column
    df.drop(labels=['Other Asia Pacific'], axis=1,inplace = True) #remove the Other column
    #df.drop(df.columns[len(df.columns)-1], axis=1, inplace=True)

    row_to_drop=list(range(thr))
    df=df.drop(df.index[row_to_drop])
    if brr !=0:
        df=df[:-brr]
    df.set_index('year',inplace=True)
    return df


## Data wrangling (from the giant Excel workbook)

In [7]:
#reading worksheets, wrangling and saving all sheets to a new workbook

outputpath="..\data\cleandata\Wine_History.xlsx"
writer = pd.ExcelWriter(outputpath, engine='xlsxwriter')
#Now reading Wine consumption volume(dfwcv) (KL) (ws #34)

sheetname='T34 Wine consumption vol'
dfwcv=giant_worksheet(sheet_giant=sheetname,thr=30)
dfwcv.to_excel(writer, sheet_name=sheetname)

sheetname='T6 Wine production'
#Now reading Wine production volume(dfwpv) (KL) (ws #6)
dfwpv=giant_worksheet(sheet_giant=sheetname,thr=30)
dfwpv.to_excel(writer, sheet_name=sheetname)

sheetname='T96 Wine cons intensity index'
#Reading global wine consumption intensity index (dfwcii) (ws #96)
dfwcii=giant_worksheet(sheet_giant=sheetname, brr=15,thr=126)
dfwcii.to_excel(writer, sheet_name=sheetname)

sheetname='T38 Wine consumption per capita'
#Reading global wine consumption per capita (dfwcpc) (litres) (ws #38)
dfwcpc=giant_worksheet(sheet_giant=sheetname,thr=30)
dfwcpc.to_excel(writer, sheet_name=sheetname)

sheetname='T8 Wine prodn per capita'
#Reading global wine production per capita (dfwppc) (litres) (ws #8)
dfwppc=giant_worksheet(sheet_giant=sheetname,thr=30,brr=3)
dfwppc.to_excel(writer, sheet_name=sheetname)

sheetname='T58 Population'
#Reading global population (dfgp) (*1000) (ws #58)
dfgp=giant_worksheet(sheet_giant=sheetname,thr=30)
dfgp.to_excel(writer, sheet_name=sheetname)

writer.save()

NO Norway column found in worksheet: T6 Wine production
NO Norway column found in worksheet: T8 Wine prodn per capita


## Plotting global wine trade plots based on the generated dataframes

In [8]:
country_name =['United States', 'Canada', 'Italy', 'France', 'Spain', 'Brazil', 'Australia', 
               'Portugal','United Kingdom','Norway','World']

### This function summerizes the plotting (Plotly) requests for every dataframe from the giant file 

In [9]:
# It takes left y-axis and (if exists) right y-axis paramters and name of the dataframe to extract the data from. 
# Input:
#      ldf: dataframe required for primary (left) y-axis (required)
#      rdf: dataframe required for secondary (right) y-axis 
#      llg: inquiry list for primary y-axis (required)
#      rlg: inquiry list for secondary y-axis
#      nyax: 1: only primary y-axis, 2: prmiary and secondary y-axis

# Putput:
#      figure object (fig) that can be plotted by pltly

def plot_scatter(_title, ldf, llg, rdf=[], rlg=[], nyax=1):
    data = []
    tracex = []
    shps = []

    # we define two y=axis based on primary y-axis group (lyg) and, if exists, secondary y-axis group (ryg)
    for x in llg:
        trace =  go.Scatter(
              x=ldf.index,
              y=ldf[x],
              name=x
        )
        data.append(trace)
    
    for x in rlg:
        trace= go.Scatter(
              x=rdf.index,
              y=rdf[x],
              name=x,
              yaxis='y2'
        )
        data.append(trace)
    
    layout = {
        'autosize':False,
        'height': 800,
        'title' : _title,
        'xaxis' : {'title' : 'Year'},
        'yaxis' : {'title' : 'XXX',
                   'range':[0,max(ldf[x].max() for x in llg)*1.05],
                   'automargin':True},
        'showlegend': True,
        #'shapes': shps
    }
    fig = go.Figure(data=data, layout=layout)
    if (nyax==2):
        fig.layout.update({'yaxis2' : {'title' : 'World Population',
                                       'range':[0,max(rdf[x].max() for x in rlg)*1.05],
                                       'automargin':True,
                                       'side': 'right',
                                       'overlaying': 'y'
                                      }
                          })
        
    fig.layout.update({'height':800})
    plot(fig, filename = _title+'.html', auto_open=False)
    
    return fig

### Plotting scatters of the generated dataframes from the giant file

In [10]:
#dfwcii scatter plot
init_notebook_mode(connected=True)
iplot(plot_scatter('Global_wine_consumption_intensity_index',dfwcii,country_name))

In [11]:
#dfwcv scatter plot
init_notebook_mode(connected=True)
iplot(plot_scatter('Global_wine_consumption_volume',dfwcv,country_name))

In [113]:
# Scatter plot: World wine consumption/production together with world population
init_notebook_mode(connected=True)
rg=['World'] #right y-axis list

#option 1: per cpaita results
#lgpc=['World_prod_per_capita','World_consump_per_capita'] #left y-axis list
#gcppc=pd.DataFrame({'World_prod_per_capita':dfwppc['World'], 'World_consump_per_capita': dfwcpc['World']}) #left y-axis dtaframe
#iplot(plot_scatter('Global Production_Consumption per Capita',gcppc,lgpc,rdf=dfgp,rlg=rg,nyax=2))


option 2: volumetric results
lg=['World_prod','World_consump']
gcpv=pd.DataFrame({'World_prod':dfwpv['World'], 'World_consump': dfwcv['World']}) #left y-axis dataframe
iplot(plot_scatter('Global Volumetric Production Consumption (KL)',gcpv,lg,rdf=dfgp,rlg=rg,nyax=2))

# Analyzing global historical wine price,rating,variety, (Working on 130k MS-Excel File)

## Data Wrangling (from the 130K Excel workbook)

### 1) Import data from 130k Excel workbook

### 2) Cleaning up the data (e.g. remow null values, tester twitter column),  and 

### 3) Export the data back to another csv file

### Note: It reduced the number of studied wines from 129970 to 121000

In [18]:

file_one = "../data/rawdata/winemag-data-130k-v2.csv"
df = pd.read_csv(file_one,index_col=0)
#cleaning data: delete the taster_tweeter address column
df.drop(columns=['taster_twitter_handle'])
#drop any null values
df = df.dropna(subset=['country','points','price'])
df.sort_values(by=['price'],ascending=False)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
80290,France,This ripe wine shows plenty of blackberry frui...,,88,3300.0,Bordeaux,Médoc,,Roger Voss,@vossroger,Château les Ormes Sorbet 2013 Médoc,Bordeaux-style Red Blend,Château les Ormes Sorbet
15840,France,The wine is a velvet glove in an iron fist. Th...,,96,2500.0,Bordeaux,Pomerol,,Roger Voss,@vossroger,Château Pétrus 2014 Pomerol,Bordeaux-style Red Blend,Château Pétrus
98380,France,"A superb wine from a great year, this is power...",,96,2500.0,Burgundy,La Romanée,,Roger Voss,@vossroger,Domaine du Comte Liger-Belair 2010 La Romanée,Pinot Noir,Domaine du Comte Liger-Belair
120391,US,The nose on this single-vineyard wine from a s...,Roger Rose Vineyard,91,2013.0,California,Arroyo Seco,Central Coast,Matt Kettmann,@mattkettmann,Blair 2013 Roger Rose Vineyard Chardonnay (Arr...,Chardonnay,Blair
113564,France,"A wonderfully complete wine, with all the elem...",,96,2000.0,Burgundy,La Romanée,,Roger Voss,@vossroger,Domaine du Comte Liger-Belair 2005 La Romanée,Pinot Noir,Domaine du Comte Liger-Belair
65352,France,This extravagantly perfumed wine has great jui...,,97,2000.0,Bordeaux,Pomerol,,Roger Voss,@vossroger,Château Pétrus 2011 Pomerol,Bordeaux-style Red Blend,Château Pétrus
1558,France,"A massive wine for Margaux, packed with tannin...",,98,1900.0,Bordeaux,Margaux,,Roger Voss,@vossroger,Château Margaux 2009 Margaux,Bordeaux-style Red Blend,Château Margaux
111755,France,This is the finest Cheval Blanc for many years...,,100,1500.0,Bordeaux,Saint-Émilion,,Roger Voss,@vossroger,Château Cheval Blanc 2010 Saint-Émilion,Bordeaux-style Red Blend,Château Cheval Blanc
111753,France,"Almost black in color, this stunning wine is g...",,100,1500.0,Bordeaux,Pauillac,,Roger Voss,@vossroger,Château Lafite Rothschild 2010 Pauillac,Bordeaux-style Red Blend,Château Lafite Rothschild
1575,France,"The purest Cabernet Sauvignon fruit, with dark...",,96,1300.0,Bordeaux,Pauillac,,Roger Voss,@vossroger,Château Mouton Rothschild 2009 Pauillac,Bordeaux-style Red Blend,Château Mouton Rothschild


## Wine categories:
1) Bold Red: Malbec, Syrah, Shiraz, Mourvedre, Pinotage, Petite Sirah, Touriga Nacional, Cabernet Sauvignon, Bordeaux Blend, Meritage

2) Medium Red: Meriot, Sangiovese, Zinfandel, Cabernet Franc, Tempranillo, Nebbiolo, Barbera, Cotes du Rhone Blend

3) Light Red: Pinot Noir, Grenache, Gamay, St. Laurent, Carignan, Counoise

4) Rich White: Chardonnay, Semillon, Viognier, Marsanne, Roussanne

5) Light White: Sauvignon Blanc, Albarino, Pitot Blanc, Vermentino, Melon de Bourgogne, Gargenega, Trebbiano, Pinot Gris, Pinot Grigio

6) Rose: Provencal Rose, White Zinfandel, Loire Valley Rose, Pinot Noir Rose, Syrah Rose, Garnache Rosado, Bandol Rose, Tempranilio Rose, Saignee Method Rose

7) Sweet White: Moscato, Riesling, Chenin Blanc, Gewurztraminer, Late Harvest Whites, Alascian Pinot Gris

8) Sparkling: Champagne, Prosecco, Cremant, Cava, Metodo Classico, Sparkling Wine, Sparkling Rose

9) Dessert: Port, Sherry, Maderia, Vin Santo, Muscat, PX, Pedro Ximenez

In [19]:
Wine_types={'Bold Red': ['Malbec', 'Syrah', 'Shiraz', 'Red Blend', 'Mourvedre', 'Pinotage', 'Petite Sirah', 'Touriga Nacional', 'Cabernet Sauvignon', 'Bordeaux-style Red Blend', 'Meritage','Portuguese Red','Merlot'],
           'Medium Red': ['Meriot', 'Sangiovese', 'Zinfandel','Cabernet Franc', 'Tempranillo', 'Nebbiolo', 'Barbera', 'Cotes du Rhone Blend'],
           'Light Red':[ 'Pinot Noir', 'Grenache', 'Gamay', 'St. Laurent', 'Carignan', 'Counoise'],
           'Rich White': ['Chardonnay', 'Semillon','Viognier', 'Marsanne', 'Roussanne'],
           'Light White': ['Sauvignon Blanc', 'Albarino', 'Pitot Blanc', 'Vermentino', 'Melon de Bourgogne', 'Gargenega', 'Trebbiano', 'Pinot Gris', 'Pinot Grigio','Bordeaux-style White Blend'],
           'Sweet White': ['Moscato', 'Riesling', 'Chenin Blanc', 'Gewurztraminer', 'Late Harvest Whites', 'Alascian Pinot Gris'],
           'Rose': ['Provencal Rose', 'White Zinfandel', 'Loire Valley Rose', 'Pinot Noir Rose', 'Syrah Rose', 'Garnache Rosado', 'Bandol Rose', 'Tempranilio Rose', 'Saignee Method Rose','Rosé'], 
           'Sparkling': ['Champagne', 'Prosecco', 'Cremant', 'Cava', 'Metodo Classico', 'Sparkling Wine', 'Sparkling Rose'],
           'Dessert': ['Port', 'Sherry', 'Maderia', 'Vin Santo', 'Muscat', 'PX', 'Pedro Ximenez']
           }
Wine_types.keys()
for x in Wine_types.keys():
    for y in range(len(Wine_types[x])):
        print('type=',x,'subtype=',Wine_types[x][y])
        A=df[df.variety.str.contains(Wine_types[x][y],na=False)].index.tolist()
        df.loc[A,'vt']=x
idx = df['vt'].isnull( )
df.loc[idx,'vt']='Others'
df.head()

dict_keys(['Bold Red', 'Medium Red', 'Light Red', 'Rich White', 'Light White', 'Sweet White', 'Rose', 'Sparkling', 'Dessert'])

In [23]:
Wine_types.keys()

dict_keys(['Bold Red', 'Medium Red', 'Light Red', 'Rich White', 'Light White', 'Sweet White', 'Rose', 'Sparkling', 'Dessert'])

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go

fig = {
  "data": [
    {
      "values": [16, 15, 12, 6, 5, 4, 42],
      "labels": Wine_types.keys(),
      "domain": {"x": [0, .48]},
      "name": "Wine Average (Mean) Price by Variety",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    },
    {
      "values": [27, 11, 25, 8, 1, 3, 25],
      "labels":  Wine_types.keys(),
      "text":["CO2"],
      "textposition":"inside",
      "domain": {"x": [.52, 1]},
      "name": "Mean Price",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    }],
  "layout": {
        "title":"GLobal Wine Price by Variety",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Mean",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Maximum",
                "x": 0.8,
                "y": 0.5
            }
        ]
    }
}
py.iplot(fig, filename='donut')

In [21]:
df['vt'].value_counts()

Bold Red       37387
Others         20650
Light Red      14829
Medium Red     12581
Rich White     12512
Light White     8286
Sweet White     5930
Dessert         4068
Rose            3261
Sparkling       1412
Name: vt, dtype: int64

In [None]:
fig,ax=plt.subplots(num=1,figsize=(10,10), dpi=80, facecolor='w', edgecolor='k')
l1=ax.scatter(df.points,df.price)
ax.set_xlabel('Points',fontsize=18)
ax.set_ylabel('Price ($)',fontsize=18)
plt.locator_params(axis='x', nbins=5)

In [None]:
#extract points and price columns
df_numeric=df._get_numeric_data()

#making bins in which Data will be held and creating a group based off of the bins
bins = [80,85,90,95,101]

# Create the names for the four bins
group_names = ['80-84','85-89','90-94','95-100']

df_numeric["points_avg"] = pd.cut(df_numeric["points"], bins, labels=group_names, right=False)
df_bins = df_numeric.groupby("points_avg",as_index=False)
print(df_bins.describe())
#plot the categorized data in four bins

In [None]:
#plot the categorized data in four bins
matplotlib.rc('ytick',labelsize=18)

tick_labels =list(df_bins.groups.keys())
A=[]
for keys in tick_labels:
    A.append(df_bins.get_group(keys).price)

fig,ax=plt.subplots(num=1,figsize=(10,10), dpi=80, facecolor='w', edgecolor='k')
ax.boxplot(A,notch=False,sym='gD',vert=True,whis=1.5)
ax.set_xticklabels(tick_labels)
#ax.axes.get_yaxis().set_ticks([])
ax.set_xlabel('Points',fontsize=18)
ax.set_ylabel('Price ($)',fontsize=18)
ax.set_ylim(0,300)

fig,ax=plt.subplots(num=2,figsize=(10,5), dpi=80, facecolor='w', edgecolor='k')
ax.boxplot(A,0,'gD')
ax.set_xticklabels(tick_labels)
ax.set_xlabel('Points',fontsize=18)
ax.set_ylabel('Price ($)',fontsize=18)
ax.set_ylim(300,3000)

plt.show()
ax.axes.get_yaxis().set_visible(False)
#df_bins.describe()


In [None]:
# In order to plot the avg price, avg points, and count of each wine globally, we started to work with
# province/country combination for all the countries globally. But, when plotted the heatmap, the circles
# from small provinces (for example in Portegaul) were intereferring and that we could not observe the results as expected.
# Then, we decided to divide the data into two group:
# 1) All the wines from every region in the owrld except for the US was represented by the country.
# 2) The US wines were represented by each state.
# In the following, first we work with the countries followed by the state/US section

dfc=df.pivot_table(index='country',values=['price','points'],aggfunc=[np.mean,np.max,np.min])
dfc.columns = list(map("_".join, dfc.columns))
dfc['wine_count']=df.groupby(['country'])['price'].apply(lambda x: x.count())
dfc['name']=dfc.index
dfc['lat']=0
dfc['lng']=0


df_US=df[df.country=='US']
dfUS=df_US.pivot_table(index='province',values=['price','points'],aggfunc=[np.mean,np.max,np.min])
dfUS.columns = list(map("_".join, dfUS.columns))
dfUS['wine_count']=df_US.groupby(['province'])['price'].apply(lambda x: x.count())
dfUS['name']=dfUS.index
dfUS['lat']=0
dfUS['lng']=0
dfUS.sort_values(by=['amax_price'],ascending=False)

In [None]:
#find country and US provinces locations
import gmaps
import requests
import json

from api_keys import (owmkey,gkey)
# base paramters
base_url = "https://maps.googleapis.com/maps/api/geocode/json"
params = {
        "address": "",
        "key": gkey
}

ii=0
for x in dfc.index:
    # set up a parameters dictionary
    params['address']= x
    response = requests.get(base_url, params=params)
    
    #print(json.dumps(response, indent=4, sort_keys=True)) 
    if (response.status_code == 200):
        print(x)
        response=response.json()
        if (response['status'] != 'ZERO_RESULTS'):
            ii+=1
            dfc.loc[x,'lat']=response['results'][0]['geometry']['location']['lat']
            dfc.loc[x,'lng']=response['results'][0]['geometry']['location']['lng']
#print('Country location reading completed:\n{} successful read out of {} countries\n'.format(ii,i))

ii=0
for x in dfUS.index:
    # set up a parameters dictionary
    params['address']= x + ', US'
    response = requests.get(base_url, params=params)
    
    #print(json.dumps(response, indent=4, sort_keys=True)) 
    if (response.status_code == 200):
        print(x)
        response=response.json()
        if (response['status'] != 'ZERO_RESULTS'):
            ii+=1
            dfUS.loc[x,'lat']=response['results'][0]['geometry']['location']['lat']
            dfUS.loc[x,'lng']=response['results'][0]['geometry']['location']['lng']
#print('US location reading completed:\n{} successful read out of {} US states\n'.format(ii,i))

In [None]:
#defining the function to plot gmaps
def gmap_plot(df_plot,par_Plot):
    import gmaps
    #Average price analysis
    #---------------------------- heatmap layer: countries

    #avgprc_symbol: symbol-layer + R-G heatmap of wine avg price

    locations = df_plot[["lat", "lng"]].astype(float)

    # Create a poverty Heatmap layer
    fig = gmaps.figure()

    a=np.reshape(np.array(df_plot[par_Plot]),(-1,1))
    b=[item for sublist in a for item in sublist]#takes 1*1*n array and return 1*n array, e.g. [[1],[2],[3]] to [1,2,3]

    avgprc_heat = gmaps.heatmap_layer(locations, weights=b,
                                     dissipating=False, max_intensity=100,
                                     point_radius = 1)
    #gmaps.heatmap.gradient = [
    #    'white',
    #    'orange',
    #    'red',
    #    'green',
    #    'blue',
    #    'black'
    #]

    # Adjust avgprc_layer setting to help with heatmap dissipating on zoom
    avgprc_heat.dissipating = False
    avgprc_heat.max_intensity = 100
    avgprc_heat.point_radius = 2


    #---------------------------- symbol layer:countries
    #box tempelate on the gampes with multiple paramters printing on (we also need to convery the dataframe to dictionary)

    df_plot['mean_price_str']=df_plot['mean_price'].map("{:.1f}".format)  #required for multiparam info box
    df_plot['mean_points_str']=df_plot['mean_points'].map("{:.1f}".format)  #required for multiparam info box
    
    info_box_template = """
    <dl>
    <dt>Country/State:</dt><dd>{name}</dd>
    <dt>Avg. Price:</dt><dd>{mean_price_str}</dd>
    <dt>Avg. Points:</dt>{mean_points_str}<dd>
    <dt>Price Range:</dt><dd>[{amin_price}-{amax_price}]</dd>
    <dt>Points Range:</dt>[{amin_points}-{amax_points}]<dd>
    <dt>Wine Count:</dt><dd>{wine_count}</dd>
    </dl>
    """
    
    dfc_dict=df_plot.to_dict('records') #required for multiparam info box
    box_info = [info_box_template.format(**x) for x in dfc_dict]
    
    avgprc_symbol = gmaps.symbol_layer(
        locations, fill_color='rgba(0, 150, 0, 0.1)',
        stroke_color='rgba(0, 0, 150, 0.1)', scale=1,
        info_box_content=box_info
    )

    avgprc_symbol.dissipating = True
    avgprc_symbol.max_intensity = 100
    avgprc_symbol.point_radius = 3
    #--------------------------- Plot all layers on top of each other
#    fig.add_layer(avgprc_symbol)
#    fig.add_layer(avgprc_heat)
#    return fig

    return avgprc_heat,avgprc_symbol

In [None]:
# plot max price
#fig = gmaps.figure(center=(14.09,-76.91),zoom_level=4)

#a, b=gmap_plot(dfc,'amax_price')
#c, d=gmap_plot(dfUS,'amax_price')
#fig.add_layer(a)
#fig.add_layer(b)
#fig.add_layer(c)
#fig.add_layer(d)
#fig

In [None]:
# plot mean price
fig = gmaps.figure(center=(14.09,-76.91),zoom_level=4)

a, b=gmap_plot(dfc,'mean_price')
c, d=gmap_plot(dfUS,'mean_price')
fig.add_layer(a)
fig.add_layer(b)
fig.add_layer(c)
fig.add_layer(d)

fig

In [None]:
# plot glboal wine count
fig = gmaps.figure(center=(14.09,-76.91),zoom_level=4)

a, b=gmap_plot(dfc,'wine_count')
c, d=gmap_plot(dfUS,'wine_count')
fig.add_layer(a)
fig.add_layer(b)
fig.add_layer(c)
fig.add_layer(d)

fig

In [None]:
#import seaborn as sns
#sns.distplot( dfv["count"] , color="skyblue", label="Sepal Length")
#sns.distplot( dfv["price"] , color="red", label="Sepal Width")

In [None]:
df.head()

## Working with data based on wine variety

In [None]:
dfcvt=df.pivot_table(index=['vt','country'],values=['price','points'],aggfunc=[np.mean,np.max,np.min])
dfcvt.columns = list(map("_".join, dfcvt.columns))
dfcvt['wine_count']=df.groupby(['vt','country'])['price'].apply(lambda x: x.count())
dfcvt.sort_values(by=['amax_price'],ascending=False)
dfcvt.head(50)

In [None]:
# First, in order to plot the results in gmaps widgets, we need to vectroize the consumption of each country
def findlatlng(x):
    # set up a parameters dictionary
    params['address']= x
    response = requests.get(base_url, params=params)
    
    #print(json.dumps(response, indent=4, sort_keys=True)) 
    if (response.status_code == 200):
        #print(x)
        response=response.json()
        if (response['status'] != 'ZERO_RESULTS'):
            lati=response['results'][0]['geometry']['location']['lat']
            lngi=response['results'][0]['geometry']['location']['lng']
        else: 
            lati= np.nan
            lngi=np.nan
            
    return lati,lngi

In [None]:
#Vectorizing a dataframe for the gmaps function  (from giant file)
def vector_togmaps(df):
    vector_out=pd.DataFrame([])
    vecsize=df.shape[0]*(df.shape[1])
    vector_out['country']=np.resize(df.T.index,(df.shape[0]*(df.shape[1]),1)).tolist()

    a_lat = []
    a_lng=[]
    for con in df.columns:
        a_lat.append(findlatlng(con)[0])
        a_lng.append(findlatlng(con)[1])

    vector_out['lat'] =np.resize(a_lat,vecsize).tolist()
    vector_out['lng'] =np.resize(a_lng,vecsize).tolist()
    #XXX : wine consumption
    vector_out['XXX']=np.resize(df,vecsize).tolist()
    vector_out=vector_out.dropna(how='any')
    vector_out=vector_out.reset_index(drop=True)

    A=np.asarray(df.index.tolist())
    #vector_out['year']=np.repeat(A, vector_out.shape[0]/len(A)).tolist()
    vector_out['year']=np.repeat(A, vector_out.shape[0]/len(A)).tolist()

    #keep only >0 elements
    condition=vector_out.XXX>0
    vector_out.where(cond=condition,inplace=True)
    vector_out=vector_out.dropna(how='any')

    condition=vector_out.country !='World'
    vector_out.where(cond=condition,inplace=True)
    vector_out=vector_out.dropna(how='any')

    vector_out.year=vector_out.year.astype(int)
    return vector_out
#vector_out.to_excel("annual_wine_consumption_dataframe.xlsx")

In [None]:
# Jupyter widget for exploring the consumption sheet of the giant wine Excel file dataset.
# The user uses the slider to choose a year. This renders the annual global wine consumption heatmap in that year.

from IPython.display import display
import ipywidgets as widgets

class Consumption(object):


    def __init__(self, df):
        self._df = df
        self._heatmap = None
        self._slider = None
        initial_year = min(self._df['year'])

        title_widget = widgets.HTML(
            '<h3>Annual Global Wine Consumption</h3>'
            '<h4>Data from <a href="https://www.adelaide.edu.au/wine-econ/databases">Wine Project</a></h4>'
        )

        map_figure = self._render_map(initial_year)
        controls = self._render_controls(initial_year)
        self._container = widgets.VBox([title_widget, controls, map_figure])

    def render(self):
        display(self._container)

    def _on_year_change(self, change):
        year = self._slider.value
        self._heatmap.locations = self._locations_for_year(year)
        self._total_box.value = self._total_consumption_text_for_year(year)
        return self._container

    def _render_map(self, initial_year):
        fig = gmaps.figure(center=(14.09,-76.91),zoom_level=2)
        self._heatmap = gmaps.heatmap_layer(
            self._locations_for_year(initial_year),
            self._annual_consumption_for_year(initial_year),
            max_intensity=100,
            point_radius=5,
        )
        self._heatmap.gradient = [
          'rgba(0, 255, 255, 0)',
          'rgba(0, 255, 255, 1)',
          'rgba(0, 191, 255, 1)',
          'rgba(0, 127, 255, 1)',
          'rgba(0, 63, 255, 1)',
          'rgba(0, 0, 255, 1)',
          'rgba(0, 0, 223, 1)',
          'rgba(0, 0, 191, 1)',
          'rgba(0, 0, 159, 1)',
          'rgba(0, 0, 127, 1)',
          'rgba(63, 0, 91, 1)',
          'rgba(127, 0, 63, 1)',
          'rgba(191, 0, 31, 1)',
          'rgba(255, 0, 0, 1)'
        ]
        #self._heatmap.set('gradient', heatmap.get('gradient') ? null : gradient);
        '''
        self._heatmap.gradient = [
            'yellow',
            'lime',
            'red',
        ]
        '''
        fig.add_layer(self._heatmap)
        return fig

        
    def _render_controls(self, initial_year):
        self._slider = widgets.IntSlider(
            value=initial_year,
            min=min(self._df['year']),
            max=max(self._df['year']),
            description='Year',
            continuous_update=False
        )
        self._total_box = widgets.Label(
            value=self._total_consumption_text_for_year(initial_year)
        )
        self._slider.observe(self._on_year_change, names='value')
        controls = widgets.HBox(
            [self._slider, self._total_box],
            layout={'justify_content': 'space-between'}
        )
        return controls

    def _locations_for_year(self, year):
        return self._df[self._df['year'] == year][['lat', 'lng']]

    def _total_consumption_for_year(self, year):
        return int(self._df[self._df['year']==year]['XXX'].sum()/1000000)
    
    # I ADDED for weights of heatmap
    def _annual_consumption_for_year(self, year):
        return (self._df[self._df['year']==year]['XXX'])
        #return (annualconsump[annualconsump['year']==year]['XXX']-annualconsump[annualconsump['year']==year]['XXX'].min())/\
        #        (annualconsump[annualconsump['year']==year]['XXX'].max()-annualconsump[annualconsump['year']==year]['XXX'].min())


    def _total_consumption_text_for_year(self, year):
        #print('year=',year)
        return 'Global Wine Consumption: {} (BL)'.format(self._total_consumption_for_year(year))

In [None]:
dfmf_vector=vector_togmaps(dfmf)
gmaps.configure(api_key=gkey)
Consumption(dfmf_vector).render()

In [None]:
dfwcii_vector=vector_togmaps(dfwcii)
#dfwcii_vector.reset_index(inplace=True)
dfwcii_vector.country=[dfwcii_vector.country[x][0] for x in dfwcii_vector.index]
dfwcii_vector

In [None]:
gmaps.configure(api_key=gkey)
Consumption(dfwcii_vector).render()