In [4]:
#set up libraries and import data
import pandas as pd

reviews = pd.read_csv("../Data/winemag-data_first150k.csv", index_col=0)
reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


In [5]:
# get summary stats on points
reviews.points.describe()

count    150930.000000
mean         87.888418
std           3.222392
min          80.000000
25%          86.000000
50%          88.000000
75%          90.000000
max         100.000000
Name: points, dtype: float64

In [6]:
# describe() is data type aware
reviews.designation.describe()

count      105195
unique      30621
top       Reserve
freq         2752
Name: designation, dtype: object

In [24]:
# get the mean of points column
# reviews.points.mean()
# median
reviews.points.median()

88.0

In [8]:
# get all the unique values from designation column
reviews.designation.unique()

array(["Martha's Vineyard", 'Carodorum Selección Especial Reserva',
       'Special Selected Late Harvest', ..., 'Delaware Dolce',
       'Presidential 20-year old tawny', 'Bungalow Red'], dtype=object)

In [9]:
# Find the frequency of a designation
reviews.designation.value_counts()

Reserve                                     2752
Reserva                                     1810
Estate                                      1571
Barrel sample                               1326
Riserva                                      754
Barrel Sample                                639
Brut                                         624
Crianza                                      503
Estate Grown                                 449
Estate Bottled                               396
Dry                                          374
Old Vine                                     331
Gran Reserva                                 330
Brut Rosé                                    248
Extra Dry                                    244
Vieilles Vignes                              225
Bien Nacido Vineyard                         195
Rosé                                         180
Late Bottled Vintage                         171
Réserve                                      166
Late Harvest        

In [15]:
# Mapping to create new representations from exisiting data.

review_points_mean = reviews.points.mean()
reviews.points.map(lambda p: p - review_points_mean)

0         8.111582
1         8.111582
2         8.111582
3         8.111582
4         7.111582
5         7.111582
6         7.111582
7         7.111582
8         7.111582
9         7.111582
10        7.111582
11        7.111582
12        7.111582
13        7.111582
14        7.111582
15        7.111582
16        7.111582
17        7.111582
18        7.111582
19        7.111582
20        7.111582
21        7.111582
22        7.111582
23        7.111582
24        7.111582
25        6.111582
26        6.111582
27        6.111582
28        6.111582
29        6.111582
            ...   
150900   -6.888418
150901   -6.888418
150902   -6.888418
150903   -6.888418
150904   -6.888418
150905   -7.888418
150906    5.111582
150907    4.111582
150908    2.111582
150909    1.111582
150910    1.111582
150911   -0.888418
150912   -0.888418
150913    6.111582
150914    6.111582
150915    5.111582
150916    5.111582
150917    4.111582
150918    4.111582
150919    3.111582
150920    3.111582
150921    3.

In [19]:
# apply() is the equivalent method 
# if we want to transform a whole
# DataFrame by calling a custom method on each row.


def remean_points(row):
    row.points = row.points - review_points_mean
    return row

reviews.apply(remean_points, axis='columns')




Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,8.111582,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,8.111582,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,8.111582,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,8.111582,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,7.111582,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude
5,Spain,"Deep, dense and pure from the opening bell, th...",Numanthia,7.111582,73.0,Northern Spain,Toro,,Tinta de Toro,Numanthia
6,Spain,Slightly gritty black-fruit aromas include a s...,San Román,7.111582,65.0,Northern Spain,Toro,,Tinta de Toro,Maurodos
7,Spain,Lush cedary black-fruit aromas are luxe and of...,Carodorum Único Crianza,7.111582,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
8,US,This re-named vineyard was formerly bottled as...,Silice,7.111582,65.0,Oregon,Chehalem Mountains,Willamette Valley,Pinot Noir,Bergström
9,US,The producer sources from two blocks of the vi...,Gap's Crown Vineyard,7.111582,60.0,California,Sonoma Coast,Sonoma,Pinot Noir,Blue Farm


In [20]:
reviews.head(1)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz


In [21]:
review_points_mean = reviews.points.mean()
reviews.points - review_points_mean

0         8.111582
1         8.111582
2         8.111582
3         8.111582
4         7.111582
5         7.111582
6         7.111582
7         7.111582
8         7.111582
9         7.111582
10        7.111582
11        7.111582
12        7.111582
13        7.111582
14        7.111582
15        7.111582
16        7.111582
17        7.111582
18        7.111582
19        7.111582
20        7.111582
21        7.111582
22        7.111582
23        7.111582
24        7.111582
25        6.111582
26        6.111582
27        6.111582
28        6.111582
29        6.111582
            ...   
150900   -6.888418
150901   -6.888418
150902   -6.888418
150903   -6.888418
150904   -6.888418
150905   -7.888418
150906    5.111582
150907    4.111582
150908    2.111582
150909    1.111582
150910    1.111582
150911   -0.888418
150912   -0.888418
150913    6.111582
150914    6.111582
150915    5.111582
150916    5.111582
150917    4.111582
150918    4.111582
150919    3.111582
150920    3.111582
150921    3.