In [1]:
import pandas as pd
import io
import plotly.graph_objs as go 
from plotly.offline import plot
import plotly.plotly as py
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [4]:
# importing country-code dataset
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

In [5]:
df.sample(5)

Unnamed: 0,COUNTRY,GDP (BILLIONS),CODE
120,Luxembourg,63.93,LUX
113,Latvia,32.82,LVA
72,Gabon,20.68,GAB
30,Bulgaria,55.08,BGR
71,French Polynesia,7.15,PYF


In [6]:
country_codes = df[["COUNTRY","CODE"]].values.tolist()

In [7]:
country_codes[54]

['Czech Republic', 'CZE']

In [8]:
wine = pd.read_csv('../tfm_folder/winemag-data_first150k.csv.zip',sep=",")
wine.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


In [9]:
wine = wine.drop('Unnamed: 0',1)
wine = wine.drop_duplicates()
len(wine)

97851

In [10]:
country_points = wine.groupby('country').points.mean().reset_index()

In [11]:
country_points.head()

Unnamed: 0,country,points
0,Albania,88.0
1,Argentina,86.076298
2,Australia,87.959708
3,Austria,89.380296
4,Bosnia and Herzegovina,85.333333


In [13]:
#function to transforme country-names to country-codes that can be interpreted by the choropleth map
def name_to_code(name):
    if name in dict(country_codes).keys():
        return dict(country_codes)[name]
    if name=="US":
        return "USA"
    if name=="South Korea":
        return "PRK"
    if name=="England":
        return "GBR"
    

In [15]:
#creating new column named "code"
country_points["code"] = country_points.country.apply(name_to_code)

In [16]:
country_points.head()

Unnamed: 0,country,points,code
0,Albania,88.0,ALB
1,Argentina,86.076298,ARG
2,Australia,87.959708,AUS
3,Austria,89.380296,AUT
4,Bosnia and Herzegovina,85.333333,BIH


In [17]:
#simple choropleth map showing average rating-score of wines from each country
data = [ dict(
        type = 'choropleth',
        locations = country_points['code'],
        z = country_points["points"],
        text = country_points['country'],
        colorscale = [[80,"rgb(5, 10, 172)"],[83,"rgb(40, 60, 190)"],[89,"rgb(70, 100, 245)"],\
            [92,"rgb(90, 120, 245)"],[94,"rgb(106, 137, 247)"],[100,"rgb(220, 220, 220)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = None,
            title = 'Quality of wine by country'),
      ) ]

layout = dict(
    title = 'Mean rating of wines',
    geo = dict(
        showframe = True,
        showcoastlines = True,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )

In [19]:
plot(fig, validate=False,filename='d3-world-map.html')

'file:///Users/luis/Desktop/tfm_folder/d3-world-map.html'

In [20]:
wine["code"] = wine.country.apply(name_to_code)

In [21]:
wine.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery,code
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz,USA
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez,ESP
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley,USA
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi,USA
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude,FRA


In [25]:
#best wine by country
best_by_country = []
for country in wine.code.unique():
    df = wine[wine.code == country].sort_values(by="points",ascending=False).head(1)
    df = df.sort_values(by="price",ascending=True)
    df = df[["code","country","designation","points","price","province","variety","winery"]] 
    best_by_country.append(df)

In [26]:
best_by_country[0]

Unnamed: 0,code,country,designation,points,price,province,variety,winery
98647,USA,US,Litton Estate Vineyard,100,100.0,California,Pinot Noir,Williams Selyem


In [27]:
best_by_country = pd.concat(best_by_country)

In [29]:
# dataframe showing best wine for each country
best_by_country.head()

Unnamed: 0,code,country,designation,points,price,province,variety,winery
98647,USA,US,Litton Estate Vineyard,100,100.0,California,Pinot Noir,Williams Selyem
10538,ESP,Spain,Clon de la Familia,98,450.0,Northern Spain,Tinto Fino,Emilio Moro
26296,FRA,France,Clos du Mesnil,100,1400.0,Champagne,Chardonnay,Krug
111087,ITA,Italy,Occhio di Pernice,100,210.0,Tuscany,Prugnolo Gentile,Avignonesi
25,NZL,New Zealand,Maté's Vineyard,94,57.0,Kumeu,Chardonnay,Kumeu River


In [30]:
best_by_country = best_by_country.dropna()

In [45]:
data = [ dict(
        type = 'choropleth',
        locations = best_by_country['code'],
        z = best_by_country['price'],
        text = "designation:"+best_by_country['designation']+ "; " + "winery:" + best_by_country['winery']+ ";" +"variety:" +best_by_country['variety'],
        colorscale = [[0,"rgb(5, 10, 172)"],[50.,"rgb(40, 60, 190)"],[100.,"rgb(70, 100, 245)"],\
            [150.,"rgb(90, 120, 245)"],[220.,"rgb(106, 137, 247)"],[1400.,"rgb(220, 220, 220)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '$',
            title = 'Cheapest among best wines in each country'),
      ) ]

layout = dict(
    title = 'Cheapest among best wines in each country',
    geo = dict(
        showframe = True,
        showcoastlines = True,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )


In [46]:
plot(fig, validate=False,filename='d3-world-map.html')

'file:///Users/luis/Desktop/tfm_folder/d3-world-map.html'