In [1]:
import pandas as pd
import numpy as np

class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

In [2]:
df1 = pd.DataFrame({'employee':['hit','spidy','stark'],'group':['guner','weber','robot']})
df2 = pd.DataFrame({'employee':['stark','hit','spidy'],'date':[2008,2005,2002]})    

In [3]:
display('df1','df2')

Unnamed: 0,employee,group
0,hit,guner
1,spidy,weber
2,stark,robot

Unnamed: 0,date,employee
0,2008,stark
1,2005,hit
2,2002,spidy


In [4]:
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,employee,group,date
0,hit,guner,2005
1,spidy,weber,2002
2,stark,robot,2008


In [5]:
df4 = pd.DataFrame({'group':['weber','robot','guner'],'supervisor':['spiderman','ironman','hitman']})
display('df1','df2','df3','pd.merge(df3,df4)')

Unnamed: 0,employee,group
0,hit,guner
1,spidy,weber
2,stark,robot

Unnamed: 0,date,employee
0,2008,stark
1,2005,hit
2,2002,spidy

Unnamed: 0,employee,group,date
0,hit,guner,2005
1,spidy,weber,2002
2,stark,robot,2008

Unnamed: 0,employee,group,date,supervisor
0,hit,guner,2005,hitman
1,spidy,weber,2002,spiderman
2,stark,robot,2008,ironman


In [6]:
df5 = pd.DataFrame({'group':['weber','robot','guner'],'skills':['jumping from building','jarvis','sharp shootter']})

In [7]:
display('df1','df5')

Unnamed: 0,employee,group
0,hit,guner
1,spidy,weber
2,stark,robot

Unnamed: 0,group,skills
0,weber,jumping from building
1,robot,jarvis
2,guner,sharp shootter


In [8]:
pd.merge(df1,df5)

Unnamed: 0,employee,group,skills
0,hit,guner,sharp shootter
1,spidy,weber,jumping from building
2,stark,robot,jarvis


In [9]:
display('df1','df2',"pd.merge(df1,df2,on='employee')")

Unnamed: 0,employee,group
0,hit,guner
1,spidy,weber
2,stark,robot

Unnamed: 0,date,employee
0,2008,stark
1,2005,hit
2,2002,spidy

Unnamed: 0,employee,group,date
0,hit,guner,2005
1,spidy,weber,2002
2,stark,robot,2008


In [10]:
df6 = pd.DataFrame({'name':['hit','spidy','stark'],'fund':['1k$','0.5m$','1.6b$']})

In [12]:
df7=pd.merge(df1,df6,left_on='employee',right_on ='name')
df7

Unnamed: 0,employee,group,fund,name
0,hit,guner,1k$,hit
1,spidy,weber,0.5m$,spidy
2,stark,robot,1.6b$,stark


In [13]:
df7.drop('name',axis=1)

Unnamed: 0,employee,group,fund
0,hit,guner,1k$
1,spidy,weber,0.5m$
2,stark,robot,1.6b$


In [14]:
df1a = df1.set_index('employee')
df2a = df2.set_index('employee')
display('df1a','df2a')

Unnamed: 0_level_0,group
employee,Unnamed: 1_level_1
hit,guner
spidy,weber
stark,robot

Unnamed: 0_level_0,date
employee,Unnamed: 1_level_1
stark,2008
hit,2005
spidy,2002


In [15]:
pd.merge(df1a,df2a,left_index=True,right_index=True)

Unnamed: 0_level_0,group,date
employee,Unnamed: 1_level_1,Unnamed: 2_level_1
hit,guner,2005
spidy,weber,2002
stark,robot,2008


In [17]:
df1a.join(df2a)#join method work on index

Unnamed: 0_level_0,group,date
employee,Unnamed: 1_level_1,Unnamed: 2_level_1
hit,guner,2005
spidy,weber,2002
stark,robot,2008


In [18]:
#specifying set arithmetic for joins

In [21]:
data1= pd.DataFrame({'name':['peter','paul','mary'],'drink':['wine','vodaka','beer']})
data2 = pd.DataFrame({'name':['mary','priaynshu'],'food':['fish','chicken']})

In [22]:
display('data1','data2','pd.merge(data1,data2)')

Unnamed: 0,drink,name
0,wine,peter
1,vodaka,paul
2,beer,mary

Unnamed: 0,food,name
0,fish,mary
1,chicken,priaynshu

Unnamed: 0,drink,name,food
0,beer,mary,fish


In [24]:
pd.merge(data1,data2,how='inner')

Unnamed: 0,drink,name,food
0,beer,mary,fish


In [25]:
pd.merge(data1,data2,how='outer')

Unnamed: 0,drink,name,food
0,wine,peter,
1,vodaka,paul,
2,beer,mary,fish
3,,priaynshu,chicken


In [26]:
pd.merge(data1,data2,how = 'left')

Unnamed: 0,drink,name,food
0,wine,peter,
1,vodaka,paul,
2,beer,mary,fish


In [27]:
pd.merge(data1,data2,how = 'right')

Unnamed: 0,drink,name,food
0,beer,mary,fish
1,,priaynshu,chicken


In [28]:
data3 = pd.DataFrame({'name':['bob','jake','lisa','sue'],'rank':[1,2,3,4]})
data4 = pd.DataFrame({'name':['bob','jake','lisa','sue'],'rank':[3,1,4,2]})
display('data3','data4',"pd.merge(data3,data4,on='name')")

Unnamed: 0,name,rank
0,bob,1
1,jake,2
2,lisa,3
3,sue,4

Unnamed: 0,name,rank
0,bob,3
1,jake,1
2,lisa,4
3,sue,2

Unnamed: 0,name,rank_x,rank_y
0,bob,1,3
1,jake,2,1
2,lisa,3,4
3,sue,4,2


In [32]:
pd.merge(data3,data4,on='name',suffixes=['_skill level','_management'])

Unnamed: 0,name,rank_skill level,rank_management
0,bob,1,3
1,jake,2,1
2,lisa,3,4
3,sue,4,2


In [45]:
# us population data 
population = pd.read_csv('./data_csv/data-USstates-master/state-population.csv')

In [44]:
areas = pd.read_csv('./data_csv/data-USstates-master/state-areas.csv')

In [43]:
abbrevs = pd.read_csv('./data_csv/data-USstates-master/state-abbrevs.csv')

In [40]:
display('population.head()','areas.head()','abbrevs.head()')

Unnamed: 0,state/region,ages,year,population
0,AL,under18,2012,1117489.0
1,AL,total,2012,4817528.0
2,AL,under18,2010,1130966.0
3,AL,total,2010,4785570.0
4,AL,under18,2011,1125763.0

Unnamed: 0,state,area (sq. mi)
0,Alabama,52423
1,Alaska,656425
2,Arizona,114006
3,Arkansas,53182
4,California,163707

Unnamed: 0,state,abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA
