In [47]:
# Load necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Plotting pretty figures and avoid blurry images
%config InlineBackend.figure_format = 'retina'
# Larger scale for plots in notebooks
sns.set_context('talk')

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Enable multiple cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# 1. assign

In [2]:
df = pd.DataFrame({'temp_c': [17.0, 25.0]},
                  index=['Portland', 'Berkeley'])
df

Unnamed: 0,temp_c
Portland,17.0
Berkeley,25.0


In [11]:
df = df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)
df

Unnamed: 0,temp_c,temp_f
Portland,17.0,62.6
Berkeley,25.0,77.0


In [12]:
df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
          temp_k=lambda x: (x['temp_f'] +  459.67) * 5 / 9)

Unnamed: 0,temp_c,temp_f,temp_k
Portland,17.0,62.6,290.15
Berkeley,25.0,77.0,298.15


In [18]:
df.assign(some_num = lambda x: np.random.randn())

Unnamed: 0,temp_c,temp_f,some_num
Portland,17.0,62.6,0.562877
Berkeley,25.0,77.0,0.562877


# 2. melt

In [22]:
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, 'B': {0: 1, 1: 3, 2: 5}})
df

Unnamed: 0,A,B
0,a,1
1,b,3
2,c,5


In [23]:
pd.melt(df, id_vars=['A'], value_vars=['B'])

Unnamed: 0,A,variable,value
0,a,B,1
1,b,B,3
2,c,B,5


In [27]:
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
                    'B': {0: 1, 1: 3, 2: 5}, 
                   'C': {0: 2, 1: 4, 2: 6}})

In [29]:
pd.melt(df, id_vars=['A'], value_vars=['B'])

Unnamed: 0,A,variable,value
0,a,B,1
1,b,B,3
2,c,B,5


In [30]:
pd.melt(df, id_vars=['A'], value_vars=['C'])

Unnamed: 0,A,variable,value
0,a,C,2
1,b,C,4
2,c,C,6


In [32]:
df_melted = pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
df_melted

Unnamed: 0,A,variable,value
0,a,B,1
1,b,B,3
2,c,B,5
3,a,C,2
4,b,C,4
5,c,C,6


# 3. Melt

In [48]:
df = pd.DataFrame({'New York': [25]})
df

Unnamed: 0,New York
0,25


In [49]:
df.melt()

Unnamed: 0,variable,value
0,New York,25


In [50]:
df = pd.DataFrame({'New york': [25], 'Paris': [27], 'London': [30]})
df

Unnamed: 0,New york,Paris,London
0,25,27,30


In [51]:
df.melt()

Unnamed: 0,variable,value
0,New york,25
1,Paris,27
2,London,30


In [53]:
df_larger = pd.DataFrame({
    'New york': [25, 27, 23, 25, 29],
    'Paris': [27, 22, 24, 26, 28],
    'London': [30, 31, 33, 29, 25]
    })
df_larger

Unnamed: 0,New york,Paris,London
0,25,27,30
1,27,22,31
2,23,24,33
3,25,26,29
4,29,28,25


In [55]:
df_larger.melt()

Unnamed: 0,variable,value
0,New york,25
1,New york,27
2,New york,23
3,New york,25
4,New york,29
5,Paris,27
6,Paris,22
7,Paris,24
8,Paris,26
9,Paris,28


In [56]:
df.melt(var_name='city', value_name='temperature')

Unnamed: 0,city,temperature
0,New york,25
1,Paris,27
2,London,30


In [57]:
df

Unnamed: 0,New york,Paris,London
0,25,27,30


# 3. pivot_table

In [35]:
tips = sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [37]:
# Using groupby
result = tips.groupby('sex')['total_bill'].sum()
result

sex
Male      3256.82
Female    1570.95
Name: total_bill, dtype: float64

In [38]:
type(result)

pandas.core.series.Series

In [39]:
# Using pivot_table
result_pivot = tips.pivot_table(values='total_bill', index='sex', aggfunc=np.sum)
type(result_pivot)

pandas.core.frame.DataFrame

In [40]:
result_pivot

Unnamed: 0_level_0,total_bill
sex,Unnamed: 1_level_1
Male,3256.82
Female,1570.95


In [41]:
result = tips.groupby('sex')['total_bill'].sum().reset_index()
result

Unnamed: 0,sex,total_bill
0,Male,3256.82
1,Female,1570.95


In [44]:
result = tips.groupby('sex')['total_bill'].sum()
result

sex
Male      3256.82
Female    1570.95
Name: total_bill, dtype: float64

In [45]:
tips.groupby(['sex', 'day'])['total_bill']\
            .agg([np.mean, np.median, np.sum]).reset_index()

Unnamed: 0,sex,day,mean,median,sum
0,Male,Thur,18.714667,16.975,561.44
1,Male,Fri,19.857,17.215,198.57
2,Male,Sat,20.802542,18.24,1227.35
3,Male,Sun,21.887241,20.725,1269.46
4,Female,Thur,16.715312,13.785,534.89
5,Female,Fri,14.145556,15.38,127.31
6,Female,Sat,19.680357,18.36,551.05
7,Female,Sun,19.872222,17.41,357.7


In [46]:
tips.pivot_table(values='total_bill', 
                 index=['sex', 'day'], 
                 aggfunc=[np.mean, np.median, np.sum])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,total_bill,total_bill,total_bill
sex,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Male,Thur,18.714667,16.975,561.44
Male,Fri,19.857,17.215,198.57
Male,Sat,20.802542,18.24,1227.35
Male,Sun,21.887241,20.725,1269.46
Female,Thur,16.715312,13.785,534.89
Female,Fri,14.145556,15.38,127.31
Female,Sat,19.680357,18.36,551.05
Female,Sun,19.872222,17.41,357.7
