In [None]:
import pandas as pd
import numpy as np

#### 1. Merging, Joining, Concatenation

In [None]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                        index=[0, 1, 2, 3])

In [None]:
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                         index=[4, 5, 6, 7])

In [None]:
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                        index=[8, 9, 10, 11])

In [None]:
df1, df2, df3

In [None]:
pd.concat([df1, df2, df3])

In [None]:
pd.concat([df1, df2, df3], axis = 1)

#### Merging

In [None]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})

In [None]:
left

In [None]:
right

In [None]:
pd.merge(left, right, how='inner', on='key')

In [None]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                     'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                               'key2': ['K0', 'K0', 'K0', 'K0'],
                                  'C': ['C0', 'C1', 'C2', 'C3'],
                                  'D': ['D0', 'D1', 'D2', 'D3']})

In [None]:
right

In [None]:
left

In [None]:
pd.merge(left, right, on=['key1', 'key2'])

In [None]:
pd.merge(left, right, on=['key1', 'key2'], how='outer')

In [None]:
pd.merge(left, right, on=['key1', 'key2'], how='left')

In [None]:
pd.merge(left, right, on=['key1', 'key2'], how='right')

#### Joining

In [None]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])

In [None]:
left

In [None]:
right

In [None]:
left.join(right)

In [None]:
left.join(right, how='outer')

### 2. Grouping

In [None]:
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}


In [None]:
df = pd.DataFrame(data)
df

In [None]:
df.groupby('Company')

In [None]:
df.groupby('Company').sum()

In [None]:
df.groupby('Company').mean()

In [None]:
df.groupby('Company').max()

In [None]:
df.groupby('Company').min()

In [None]:
df.groupby('Company').count()

#### 3. Handling Missing Values

In [None]:
df = pd.DataFrame({'A':[1,2,np.nan],
                  'B':[5,np.nan,np.nan],
                  'C':[1,2,3]})

In [None]:
df

In [None]:
df.dropna()

In [None]:
df.dropna(axis=1)

In [None]:
df.fillna(value=50)

In [None]:
df.fillna(value=df['A'].mean())

In [None]:
df.dropna(thresh = 2)

### 4. Information on unique values

In [None]:
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

In [None]:
df['col2'].unique()

In [None]:
df['col2'].nunique()

In [None]:
df['col2'].value_counts()

### 5. Applying a function

In [None]:
df

In [None]:
def times2(x):
    return x**2

In [None]:
df['col1'].apply(times2)

In [None]:
df['col3'].apply(len)

In [None]:
df['col1'].sum()

#### 6. Sorting values

In [None]:
df.sort_values(by = 'col2')

#### 7. Checking null values

In [None]:
df.isnull()

### 8. Pivoting

In [None]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
     'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}
data

In [None]:
df = pd.DataFrame(data)
df

In [None]:
df.pivot_table(values='D', index=['A', 'B'], columns=['C'])

In [None]:
df.pivot_table(values='D', index=['C'], columns=['A', 'B'])