In [1]:
#py_pandas_pivot_tables.ipynb
import numpy as np              
import pandas as  pd            
import matplotlib.pyplot as plt 
%matplotlib inline
import os
datafile='earthquakes.csv'

#useful if you want the directory this script is in
if '__file__' in dir():
    path, _=os.path.split(__file__)  
else: path=os.getcwd() 
    
filename=os.path.join(path, datafile)
df=pd.read_csv(filename, parse_dates=[0]) #dialect defaults to Excel

In [2]:
#py_pandas_pivot_tables.ipynb

#use time for the index
df['place']=df['place'].astype(str)
df['place']=df['place'].str.split(',').str.get(1)
df['place']=df['place'].str.strip()
df['place']=df['place'].str.replace("CA", "California")

#  make year, month, and day columns
df['year']=df['time'].dt.year
df['mon']=df['time'].dt.month
df['day']=df['time'].dt.day

#nuke extraneous columns
df.drop(df.columns[[1,2,5,6,7,8,9,11,12,14,15,16,17,18,20,21]], \
        axis=1, inplace=True)

#make time the index
df.set_index('time', inplace=True)

df.head(3)

Unnamed: 0_level_0,depth,mag,net,place,status,year,mon,day
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-09-19 16:56:50.540,1.52,2.45,ci,California,automatic,2017,9,19
2017-09-19 16:15:29.900,2.63,2.92,nc,California,automatic,2017,9,19
2017-09-19 15:35:09.030,4.52,3.1,us,Idaho,reviewed,2017,9,19


In [3]:
#create a pivot table
pd.set_option("display.width", 100)
pt=pd.pivot_table(df, index=['place', 'year', 'mon'])
pt[:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,day,depth,mag
place,year,mon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,2017,8,28.0,166.585,4.6
Afghanistan,2017,9,4.0,210.8,4.4
Alaska,2017,8,26.245763,48.301271,3.017797
Alaska,2017,9,9.678788,52.423939,3.045455
Albania,2017,9,18.0,16.32,4.3


In [4]:
#to rearrange the groupings, simply reorder the indices
pt=pd.pivot_table(df, index=['mon', 'place', 'year'])
pt[:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,day,depth,mag
mon,place,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,Afghanistan,2017,28.0,166.585,4.6
8,Alaska,2017,26.245763,48.301271,3.017797
8,American Samoa,2017,21.0,10.0,5.3
8,Argentina,2017,22.0,215.903333,4.3
8,Bonaire,2017,21.0,39.0,2.57


In [5]:
pt=pd.pivot_table(df, index=['place'],values=['mag'], 
                  aggfunc=[np.mean, len])
pt[:5]

Unnamed: 0_level_0,mean,len
Unnamed: 0_level_1,mag,mag
place,Unnamed: 1_level_2,Unnamed: 2_level_2
Afghanistan,4.533333,3.0
Alaska,3.033922,283.0
Albania,4.3,1.0
American Samoa,5.3,1.0
Argentina,4.276923,13.0


In [6]:
pt=pd.pivot_table(df, index=['place', 'year', 'mon', 'day', 'depth'])
pt.query("place == ['Oregon', 'California'] and mag > 4")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,mag
place,year,mon,day,depth,Unnamed: 5_level_1
California,2017,8,22,6.14,4.07
Oregon,2017,8,27,10.0,4.3
Oregon,2017,9,5,10.0,4.3


In [7]:
type(pt)


pandas.core.frame.DataFrame

In [8]:
cut_offs=[0,4,6,8,10]
names=['weak', 'moderate', 'strong', 'intense']
df['strength']=pd.cut(df['mag'], cut_offs, labels=names)
#pt=pd.pivot_table(df, index=['place', 'year', 'mon', 'day', 'depth', 'strength'])
#pt.query("place == ['Oregon', 'California'] and mag > 4")



In [None]:
def classify(data_row):
    depth=data_row['depth']
    if depth < 10:     return 'surface'
    elif depth < 100:  return 'shallow'
    elif depth < 1000: return 'kinda deep'
    else:              return 'really deep'
df['deep_cat']=df.apply(classify, axis=1)
df[:5]

In [None]:
pd.crosstab(df.strength, columns= df.deep_cat, margins=True )

In [None]:
pd.crosstab([df.mon, df.strength], columns= df.deep_cat, margins=True )

In [None]:
pd.crosstab([df.mon, df.strength], columns= [df.deep_cat, df.year], margins=True )