# Merging Dataframes

In [4]:
import pandas as pd

df=pd.DataFrame([{'name':'vivek','item':'banana','cost':50},
                 {'name':'murli','item':'apple','cost':150},
                 {'name':'ashish','item':'beer','cost':200}],
               index=['store1','store2','store3'])
df

Unnamed: 0,cost,item,name
store1,50,banana,vivek
store2,150,apple,murli
store3,200,beer,ashish


In [6]:
df['date']=['august 10','september 6','august 22']
df

Unnamed: 0,cost,item,name,date
store1,50,banana,vivek,august 10
store2,150,apple,murli,september 6
store3,200,beer,ashish,august 22


In [9]:
staff_df=pd.DataFrame([{'name':'kelly','role':'director of hr'},
                       {'name':'sally','role': 'course liaison'},
                       {'name':'james','role':'grader'}])
staff_df=staff_df.set_index('name')

student_df=pd.DataFrame([{'name':'kelly','school':'business'},
                       {'name':'mike','school': 'law'},
                       {'name':'james','school':'engineering'}])
student_df=student_df.set_index('name')
print(staff_df)
print()
print(student_df)

                 role
name                 
kelly  director of hr
sally  course liaison
james          grader

            school
name              
kelly     business
mike           law
james  engineering


In [10]:
pd.merge(staff_df,student_df,how='outer',left_index=True,right_index=True)

Unnamed: 0_level_0,role,school
name,Unnamed: 1_level_1,Unnamed: 2_level_1
james,grader,engineering
kelly,director of hr,business
mike,,law
sally,course liaison,


In [11]:
pd.merge(staff_df,student_df,how='inner',left_index=True,right_index=True)

Unnamed: 0_level_0,role,school
name,Unnamed: 1_level_1,Unnamed: 2_level_1
kelly,director of hr,business
james,grader,engineering


# Scales

In [15]:
df=pd.DataFrame(['+A','A','-A','+B','B','-B','+C','C','-C','+D','D'],
                index=['excelent','excelent','excelent','good','good','good','ok','ok','ok','poor','poor'])
df.rename(columns={0:'grades'},inplace=True)
df                  

Unnamed: 0,grades
excelent,+A
excelent,A
excelent,-A
good,+B
good,B
good,-B
ok,+C
ok,C
ok,-C
poor,+D


In [17]:
grade=df['grades'].astype('category',
                    categories=['D','-D','-C','C','+C','-B','B','+B','-A','A','+A'],
                     ordered=True)
df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,grades
excelent,+A
excelent,A
excelent,-A
good,+B
good,B


In [19]:
grade > 'C'

excelent     True
excelent     True
excelent     True
good         True
good         True
good         True
ok           True
ok          False
ok          False
poor        False
poor        False
Name: grades, dtype: bool

## Data Functionality in Pandas

## Time Stamp

In [31]:
pd.Timestamp('9-1-2018 10:05AM')

Timestamp('2018-09-01 10:05:00')

## Period

In [32]:
pd.Period('1-2018')

Period('2018-01', 'M')

## Timedeltas

In [33]:
pd.Timestamp('9/1/2018')-pd.Timestamp('9/11/2018')

Timedelta('-10 days +00:00:00')

## Working with dates in DataFrame

In [34]:
dates=pd.date_range('10-01-2018',periods=9,freq='2W-SUN')
dates

DatetimeIndex(['2018-10-07', '2018-10-21', '2018-11-04', '2018-11-18',
               '2018-12-02', '2018-12-16', '2018-12-30', '2019-01-13',
               '2019-01-27'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [36]:
import numpy as np
df=pd.DataFrame({'count 1':100+np.random.randint(-5,10,9).cumsum(),
                 'count 2':120+np.random.randint(-5,10,9)},index=dates)
df

Unnamed: 0,count 1,count 2
2018-10-07,95,117
2018-10-21,101,121
2018-11-04,96,116
2018-11-18,105,123
2018-12-02,107,123
2018-12-16,110,127
2018-12-30,117,119
2019-01-13,125,123
2019-01-27,122,123
