# Summary Functions and Maps


In [26]:
import pandas as pd
pd.set_option('display.max_rows', 5)
import numpy as np
reviews = pd.read_csv("https§§§www.kaggle.com§learn§pandas/winemag-data-130k-v2.csv", index_col=0)
reviews

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,Spain,Aromas of watermelon and a dusting of natural ...,Rosado,87,11.0,Northern Spain,Rioja,,Michael Schachner,@wineschach,El Coto 2011 Rosado Rosé (Rioja),Rosé,El Coto
501,US,A Verdelho with a taste of nuts—like Marcona a...,Silvaspoons Vineyard,87,15.0,California,Lodi,Central Valley,Virginie Boone,@vboone,Fenestra 2010 Silvaspoons Vineyard Verdelho (L...,Verdelho,Fenestra


In [27]:
reviews.taster_name.describe()

count            361
unique            16
top       Roger Voss
freq              75
Name: taster_name, dtype: object

In [28]:
reviews.points.describe()

count    502.000000
mean      88.288845
            ...    
75%       90.000000
max      100.000000
Name: points, Length: 8, dtype: float64

In [29]:
reviews.taster_name.unique()

array(['Kerin O’Keefe', 'Roger Voss', 'Paul Gregutt',
       'Alexander Peartree', 'Michael Schachner', 'Anna Lee C. Iijima',
       'Virginie Boone', 'Matt Kettmann', nan, 'Sean P. Sullivan',
       'Jim Gordon', 'Joe Czerwinski', 'Anne Krebiehl\xa0MW',
       'Lauren Buzzeo', 'Mike DeSimone', 'Jeff Jenssen',
       'Susan Kostrzewa'], dtype=object)

In [30]:
reviews.taster_name.value_counts()

taster_name
Roger Voss           75
Michael Schachner    61
                     ..
Mike DeSimone         2
Jeff Jenssen          2
Name: count, Length: 16, dtype: int64

Maps
A map is a term, borrowed from mathematics, for a function that takes one set of values and "maps" them to another set of values. In data science we often have a need for creating new representations from existing data, or for transforming data from the format it is in now to the format that we want it to be in later. Maps are what handle this work, making them extremely important for getting your work done!

In [31]:
reviews_points_mean = reviews.points.mean()
reviews.points.map(lambda p: p- reviews_points_mean)


0     -1.288845
1     -1.288845
         ...   
500   -1.288845
501   -1.288845
Name: points, Length: 502, dtype: float64

In [34]:
# apply() is the equivalent method if we want to transform a whole DataFrame by calling a custom method on each row.
reviews_points_mean = reviews.points.mean()

def remean_points(row):
    row.points = row.points - reviews_points_mean
    return row

reviews.apply(remean_points, axis='columns')
# If we had called reviews.apply() with axis='index', 
# then instead of passing a function to transform each row, we would need to give a function to transform each column


Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,-1.288845,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,-1.288845,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,Spain,Aromas of watermelon and a dusting of natural ...,Rosado,-1.288845,11.0,Northern Spain,Rioja,,Michael Schachner,@wineschach,El Coto 2011 Rosado Rosé (Rioja),Rosé,El Coto
501,US,A Verdelho with a taste of nuts—like Marcona a...,Silvaspoons Vineyard,-1.288845,15.0,California,Lodi,Central Valley,Virginie Boone,@vboone,Fenestra 2010 Silvaspoons Vineyard Verdelho (L...,Verdelho,Fenestra


In [35]:
# Pandas will also understand what to do if we perform these operations between Series of equal length. 
reviews.country + " - " + reviews.region_1

0       Italy - Etna
1                NaN
           ...      
500    Spain - Rioja
501        US - Lodi
Length: 502, dtype: object