In [2]:
%autosave 0

Autosave disabled


In [3]:
import numpy as np
import pandas as pd
from sqlalchemy import text, create_engine
from pydataset import data

from env import get_connection

Let's read in the users and roles tables from the join_example_db.

In [4]:
engine = create_engine(get_connection('join_example_db'))

In [5]:
users_query = '''
              SELECT *
              FROM users
              '''

roles_query = '''
              SELECT *
              FROM roles
              '''

In [6]:
users = pd.read_sql(text(users_query), engine.connect())
roles = pd.read_sql(text(roles_query), engine.connect())

Let's try joining our two dataframes together with pd.concat()!

In [9]:
users

Unnamed: 0,id,name,email,role_id
0,1,bob,bob@example.com,1.0
1,2,joe,joe@example.com,2.0
2,3,sally,sally@example.com,3.0
3,4,adam,adam@example.com,3.0
4,5,jane,jane@example.com,
5,6,mike,mike@example.com,


In [10]:
roles

Unnamed: 0,id,name
0,1,admin
1,2,author
2,3,reviewer
3,4,commenter


In [11]:
pd.concat([users,roles])

Unnamed: 0,id,name,email,role_id
0,1,bob,bob@example.com,1.0
1,2,joe,joe@example.com,2.0
2,3,sally,sally@example.com,3.0
3,4,adam,adam@example.com,3.0
4,5,jane,jane@example.com,
5,6,mike,mike@example.com,
0,1,admin,,
1,2,author,,
2,3,reviewer,,
3,4,commenter,,


Ok, now how about calling the .merge() method off one of our tables!

In [12]:
pd.concat([users,roles],axis=1)

Unnamed: 0,id,name,email,role_id,id.1,name.1
0,1,bob,bob@example.com,1.0,1.0,admin
1,2,joe,joe@example.com,2.0,2.0,author
2,3,sally,sally@example.com,3.0,3.0,reviewer
3,4,adam,adam@example.com,3.0,4.0,commenter
4,5,jane,jane@example.com,,,
5,6,mike,mike@example.com,,,


In [13]:
users.merge(roles,how='inner',left_on='role_id',right_on='id')

Unnamed: 0,id_x,name_x,email,role_id,id_y,name_y
0,1,bob,bob@example.com,1.0,1,admin
1,2,joe,joe@example.com,2.0,2,author
2,3,sally,sally@example.com,3.0,3,reviewer
3,4,adam,adam@example.com,3.0,3,reviewer


Let's read in the mpg dataset and create a new binary column for automatic/manual transmission.

In [14]:
mpg = data('mpg')
mpg['auto_or_man'] = np.where(mpg['trans'].str.startswith('a'), 'auto', 'manual')
mpg.head()

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class,auto_or_man
1,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact,auto
2,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact,manual
3,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact,manual
4,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact,auto
5,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact,auto


Let's do a crosstab of two categorical variables, the drive type and the transmission type.

In [15]:
pd.crosstab(mpg['drv'],mpg['auto_or_man'])

auto_or_man,auto,manual
drv,Unnamed: 1_level_1,Unnamed: 2_level_1
4,75,28
f,65,41
r,17,8


We can set normalize equal to True, to return the proportion of all cars each combination represents.

In [16]:
pd.crosstab(mpg['drv'],mpg['auto_or_man'],normalize = True)

auto_or_man,auto,manual
drv,Unnamed: 1_level_1,Unnamed: 2_level_1
4,0.320513,0.119658
f,0.277778,0.175214
r,0.07265,0.034188


A pivot table will look similar to a crosstab.

We specify which two categorical variables will be along the axes.

The index argument specifies the index and the columns argument specifies the column.

We will pass in a third argument, values, to return the mean of that column.

If we don't want the mean, we can specify a fourth argument (aggfunc) for a different aggregate function.

In [17]:
mpg.pivot_table(index='drv',columns='auto_or_man',values = 'hwy')

auto_or_man,auto,manual
drv,Unnamed: 1_level_1,Unnamed: 2_level_1
4,18.586667,20.75
f,27.292308,29.536585
r,19.529412,24.125


In [18]:
mpg.pivot_table(index='drv',columns='auto_or_man',values = 'hwy',aggfunc = 'max')

auto_or_man,auto,manual
drv,Unnamed: 1_level_1,Unnamed: 2_level_1
4,27,28
f,41,44
r,25,26


The map method will use a dictionary to map existing values to desired ones.

In [19]:
drv_dict = {'4': 'four-wheel drive',
            'f': 'front-wheel drive',
            'r': 'rear-wheel drive'}

In [23]:
mpg['drv'] = mpg.drv.map(drv_dict)
mpg

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class,auto_or_man
1,audi,a4,1.8,1999,4,auto(l5),front-wheel drive,18,29,p,compact,auto
2,audi,a4,1.8,1999,4,manual(m5),front-wheel drive,21,29,p,compact,manual
3,audi,a4,2.0,2008,4,manual(m6),front-wheel drive,20,31,p,compact,manual
4,audi,a4,2.0,2008,4,auto(av),front-wheel drive,21,30,p,compact,auto
5,audi,a4,2.8,1999,6,auto(l5),front-wheel drive,16,26,p,compact,auto
...,...,...,...,...,...,...,...,...,...,...,...,...
230,volkswagen,passat,2.0,2008,4,auto(s6),front-wheel drive,19,28,p,midsize,auto
231,volkswagen,passat,2.0,2008,4,manual(m6),front-wheel drive,21,29,p,midsize,manual
232,volkswagen,passat,2.8,1999,6,auto(l5),front-wheel drive,16,26,p,midsize,auto
233,volkswagen,passat,2.8,1999,6,manual(m5),front-wheel drive,18,26,p,midsize,manual


We can transpose dataframes by saying .T.

In [24]:
mpg.T

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,225,226,227,228,229,230,231,232,233,234
manufacturer,audi,audi,audi,audi,audi,audi,audi,audi,audi,audi,...,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen,volkswagen
model,a4,a4,a4,a4,a4,a4,a4,a4 quattro,a4 quattro,a4 quattro,...,new beetle,new beetle,new beetle,passat,passat,passat,passat,passat,passat,passat
displ,1.8,1.8,2.0,2.0,2.8,2.8,3.1,1.8,1.8,2.0,...,2.0,2.5,2.5,1.8,1.8,2.0,2.0,2.8,2.8,3.6
year,1999,1999,2008,2008,1999,1999,2008,1999,1999,2008,...,1999,2008,2008,1999,1999,2008,2008,1999,1999,2008
cyl,4,4,4,4,6,6,6,4,4,4,...,4,5,5,4,4,4,4,6,6,6
trans,auto(l5),manual(m5),manual(m6),auto(av),auto(l5),manual(m5),auto(av),manual(m5),auto(l5),manual(m6),...,auto(l4),manual(m5),auto(s6),manual(m5),auto(l5),auto(s6),manual(m6),auto(l5),manual(m5),auto(s6)
drv,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,four-wheel drive,four-wheel drive,four-wheel drive,...,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive,front-wheel drive
cty,18,21,20,21,16,18,18,18,16,20,...,19,20,20,21,18,19,21,16,18,17
hwy,29,29,31,30,26,26,27,26,25,28,...,26,28,29,29,29,28,29,26,26,26
fl,p,p,p,p,p,p,p,p,p,p,...,r,r,r,p,p,p,p,p,p,p
