In [1]:
import pandas as pd
import numpy as np
import re
from ast import literal_eval
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()
%matplotlib notebook

In [2]:
file = '../data/spain-red'
df = pd.read_csv(file + '.csv', converters={'foods': literal_eval, 'highlights': literal_eval})

In [3]:
diacritics = {'&#237;': 'í', '&#243;': 'ó', '&#250;': 'ú', '&#241;': 'ñ', '&#232;': 'è', \
    '&#8364;': '€', '&#193;': 'Á', '&#192;': 'À', '&#233;': 'é', '&#224;': 'à', \
    '&#239;': 'ï', '&#231;': 'ç', '&#193;': 'Á', '&#252;': 'ü', '&#225;': 'á', '&#186;': 'º', \
    '&#244;': 'ô', '&#8217;': '’', '&#960;': 'π', '&#210;': 'Ò'}
df.replace(diacritics, regex=True, inplace=True)
df['winery'] = df['winery'].apply(lambda x: re.sub(r'((?<=[a-z_à-ÿ])[A-Z]|(?<!\A)[A-Z](?=[a-z_à-ÿ]))', r' \1', x))
df['vintage'] = df['vintage'].apply(lambda x: re.sub(r'((?<=[a-z_à-ÿ])[A-Z]|(?<!\A)[A-Z](?=[a-z_à-ÿ]))', r' \1', x))
df['region'] = df['region'].apply(lambda x: re.sub(r'((?<=[a-z_à-ÿ])[A-Z]|(?<!\A)[A-Z](?=[a-z_à-ÿ]))', r' \1', x))
df['vintage'] = df['vintage'].apply(lambda x: re.sub(r"(\S)\(", r'\1 (', x))
df['vintage'] = df['vintage'].apply(lambda x: re.sub('\(\s*(.*?)\s*\)', r'(\1)', x))
df['vintage'] = df['vintage'].apply(lambda x: re.sub(r"([0-9]+(\.[0-9]+)?)",r" \1 ", x).strip())
df['vintage'] = df['vintage'].replace(r'\s+', ' ', regex=True)

numeric_columns = ['rating', 'ratings_count', 'country_rank', 'region_rank', \
                   'winery_rank', 'global_rank', 'alcohol', 'body', 'acidity', 'price']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

df = df.drop(['drink_from', 'drink_until'], axis = 1)

In [4]:
for f in df['foods']:
    f.sort()

In [5]:
parameters = ['winery', 'vintage', 'year', 'region', 'foods', 'alcohol', 'body', 'body_desc', \
              'acidity', 'acidity_desc']
modes_styles = df.groupby(['style'])[parameters].agg(lambda x: list(x.mode()))
modes_styles

Unnamed: 0_level_0,winery,vintage,year,region,foods,alcohol,body,body_desc,acidity,acidity_desc
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Spanish Cabernet Sauvignon,[Familia Torres],[Cabernet Sauvignon],[2016],[Penedès],"[[Beef, Lamb, Poultry]]",[14.5],[4.0],[Full-bodied],[3.0],[High]
Spanish Grenache,[Jiménez- Landi],[Las Uvasdela Ira],[2018],[Madrid],"[[Beef, Game (deer, venison), Lamb, Pasta, Pou...",[14.5],[5.0],[Very full-bodied],[3.0],[High]
Spanish Mencia,[Descendientesde J. Palacios],[Villade Corullón Bierzo],[2017],[Bierzo],"[[Game (deer, venison), Poultry, Veal]]",[13.5],[3.0],[Medium-bodied],[3.0],[High]
Spanish Merlot,[Enate],[Merlot],[2017],[Somontano],"[[Beef, Lamb, Veal]]","[14.5, 15.0]",[4.0],[Full-bodied],[3.0],[High]
Spanish Monastrell,[Juan Gil],[El Sequé],[2019],[Jumilla],"[[Beef, Game (deer, venison), Lamb, Pasta]]",[14.5],[5.0],[Very full-bodied],[3.0],[High]
Spanish Montsant Red,[Venusla Universal],[Tinto],[2017],[Montsant],"[[Beef, Lamb, Pasta]]",[14.5],[4.0],[Full-bodied],[3.0],[High]
Spanish Priorat Red,[Álvaro Palacios],[Priorat],[2017],[Priorat],"[[Beef, Game (deer, venison), Lamb, Poultry]]",[14.5],[4.0],[Full-bodied],[3.0],[High]
Spanish Red,"[Familia Torres, Ànima Negra]",[Tinto],[2017],[Costersdel Segre],"[[Beef, Pasta, Poultry, Veal]]",[14.5],[4.0],[Full-bodied],[3.0],[High]
Spanish Rhône Blend Red,"[Bodegas Piqueras, Ca N' Estruc, Costersdel Si...","[Black Label Syrah- Monastrell, L&# 8217 ; Equ...",[2019],"[Almansa, Campode Borja, Catalunya, Costersdel...","[[Beef, Game (deer, venison), Lamb, Pasta]]",[14.5],[5.0],[Very full-bodied],[3.0],[High]
Spanish Ribera Del Duero Red,[Vega Sicilia],[Riberadel Duero],[2018],[Riberadel Duero],"[[Beef, Game (deer, venison), Lamb]]",[14.5],[5.0],[Very full-bodied],[3.0],[High]


In [6]:
median_prices = df['price'].groupby(df['style']).median()
median_prices

style
Spanish Cabernet Sauvignon      23.640
Spanish Grenache                24.000
Spanish Mencia                  32.890
Spanish Merlot                  25.715
Spanish Monastrell              17.960
Spanish Montsant Red            32.500
Spanish Priorat Red             57.950
Spanish Red                     22.900
Spanish Rhône Blend Red         13.670
Spanish Ribera Del Duero Red    41.950
Spanish Rioja Red               28.700
Spanish Syrah                   25.920
Spanish Tempranillo             34.310
Spanish Toro Red                34.080
Name: price, dtype: float64

In [7]:
modes_styles['median_price'] = median_prices
modes_styles

Unnamed: 0_level_0,winery,vintage,year,region,foods,alcohol,body,body_desc,acidity,acidity_desc,median_price
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Spanish Cabernet Sauvignon,[Familia Torres],[Cabernet Sauvignon],[2016],[Penedès],"[[Beef, Lamb, Poultry]]",[14.5],[4.0],[Full-bodied],[3.0],[High],23.64
Spanish Grenache,[Jiménez- Landi],[Las Uvasdela Ira],[2018],[Madrid],"[[Beef, Game (deer, venison), Lamb, Pasta, Pou...",[14.5],[5.0],[Very full-bodied],[3.0],[High],24.0
Spanish Mencia,[Descendientesde J. Palacios],[Villade Corullón Bierzo],[2017],[Bierzo],"[[Game (deer, venison), Poultry, Veal]]",[13.5],[3.0],[Medium-bodied],[3.0],[High],32.89
Spanish Merlot,[Enate],[Merlot],[2017],[Somontano],"[[Beef, Lamb, Veal]]","[14.5, 15.0]",[4.0],[Full-bodied],[3.0],[High],25.715
Spanish Monastrell,[Juan Gil],[El Sequé],[2019],[Jumilla],"[[Beef, Game (deer, venison), Lamb, Pasta]]",[14.5],[5.0],[Very full-bodied],[3.0],[High],17.96
Spanish Montsant Red,[Venusla Universal],[Tinto],[2017],[Montsant],"[[Beef, Lamb, Pasta]]",[14.5],[4.0],[Full-bodied],[3.0],[High],32.5
Spanish Priorat Red,[Álvaro Palacios],[Priorat],[2017],[Priorat],"[[Beef, Game (deer, venison), Lamb, Poultry]]",[14.5],[4.0],[Full-bodied],[3.0],[High],57.95
Spanish Red,"[Familia Torres, Ànima Negra]",[Tinto],[2017],[Costersdel Segre],"[[Beef, Pasta, Poultry, Veal]]",[14.5],[4.0],[Full-bodied],[3.0],[High],22.9
Spanish Rhône Blend Red,"[Bodegas Piqueras, Ca N' Estruc, Costersdel Si...","[Black Label Syrah- Monastrell, L&# 8217 ; Equ...",[2019],"[Almansa, Campode Borja, Catalunya, Costersdel...","[[Beef, Game (deer, venison), Lamb, Pasta]]",[14.5],[5.0],[Very full-bodied],[3.0],[High],13.67
Spanish Ribera Del Duero Red,[Vega Sicilia],[Riberadel Duero],[2018],[Riberadel Duero],"[[Beef, Game (deer, venison), Lamb]]",[14.5],[5.0],[Very full-bodied],[3.0],[High],41.95


In [14]:
columns = ['winery','vintage','year','region','foods','alcohol','body','body_desc','acidity','acidity_desc']

for c in columns:
    modes_styles[c] = modes_styles[c].astype(str)
    modes_styles[c] = modes_styles[c].apply(lambda x: x.replace('[','').replace(']','').replace("'",''))

# modes_styles['winery'] = modes_styles['winery'].astype(str)
# modes_styles['winery'] = modes_styles['winery'].apply(lambda x: x.replace('[','').replace(']',''))
modes_styles

Unnamed: 0_level_0,winery,vintage,year,region,foods,alcohol,body,body_desc,acidity,acidity_desc,median_price
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Spanish Cabernet Sauvignon,Familia Torres,Cabernet Sauvignon,2016,Penedès,"Beef, Lamb, Poultry",14.5,4.0,Full-bodied,3.0,High,23.64
Spanish Grenache,Jiménez- Landi,Las Uvasdela Ira,2018,Madrid,"Beef, Game (deer, venison), Lamb, Pasta, Poultry",14.5,5.0,Very full-bodied,3.0,High,24.0
Spanish Mencia,Descendientesde J. Palacios,Villade Corullón Bierzo,2017,Bierzo,"Game (deer, venison), Poultry, Veal",13.5,3.0,Medium-bodied,3.0,High,32.89
Spanish Merlot,Enate,Merlot,2017,Somontano,"Beef, Lamb, Veal","14.5, 15.0",4.0,Full-bodied,3.0,High,25.715
Spanish Monastrell,Juan Gil,El Sequé,2019,Jumilla,"Beef, Game (deer, venison), Lamb, Pasta",14.5,5.0,Very full-bodied,3.0,High,17.96
Spanish Montsant Red,Venusla Universal,Tinto,2017,Montsant,"Beef, Lamb, Pasta",14.5,4.0,Full-bodied,3.0,High,32.5
Spanish Priorat Red,Álvaro Palacios,Priorat,2017,Priorat,"Beef, Game (deer, venison), Lamb, Poultry",14.5,4.0,Full-bodied,3.0,High,57.95
Spanish Red,"Familia Torres, Ànima Negra",Tinto,2017,Costersdel Segre,"Beef, Pasta, Poultry, Veal",14.5,4.0,Full-bodied,3.0,High,22.9
Spanish Rhône Blend Red,"Bodegas Piqueras, ""Ca N Estruc"", Costersdel Si...","Black Label Syrah- Monastrell, L&# 8217 ; Equi...",2019,"Almansa, Campode Borja, Catalunya, Costersdel ...","Beef, Game (deer, venison), Lamb, Pasta",14.5,5.0,Very full-bodied,3.0,High,13.67
Spanish Ribera Del Duero Red,Vega Sicilia,Riberadel Duero,2018,Riberadel Duero,"Beef, Game (deer, venison), Lamb",14.5,5.0,Very full-bodied,3.0,High,41.95


In [18]:
modes_styles.to_html('../data/modes_styles.html', encoding='utf-8')