In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [10, 20, 30, 40],
    'C': [100, 200, 300, 400]
})

# Applying functions across the data

In [7]:
# apply applies a function on a column or on a row

In [3]:
df['C'].apply(np.sqrt)

0    10.000000
1    14.142136
2    17.320508
3    20.000000
Name: C, dtype: float64

In [4]:
def multiply_by_three(n):
    return n * 3

In [5]:
df['A'].apply(multiply_by_three)

0     3
1     6
2     9
3    12
Name: A, dtype: int64

In [6]:
df[['A', 'B']].apply(multiply_by_three)

Unnamed: 0,A,B
0,3,30
1,6,60
2,9,90
3,12,120


In [8]:
# map
mapping = {1: 'ONE', 2: 'TWO', 3: "THREE", 4: "FOUR"}
df['A'].map(mapping)

0      ONE
1      TWO
2    THREE
3     FOUR
Name: A, dtype: object

In [9]:
df.applymap(multiply_by_three)

Unnamed: 0,A,B,C
0,3,30,300
1,6,60,600
2,9,90,900
3,12,120,1200


# Groupby

In [10]:
df = pd.DataFrame({
    'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
    'Max Speed': [380., 370., 24., 26.],
    'Weight': [1, 1.5, 0.3, 0.5]
})

In [12]:
df.groupby('Animal').mean()

Unnamed: 0_level_0,Max Speed,Weight
Animal,Unnamed: 1_level_1,Unnamed: 2_level_1
Falcon,375.0,1.25
Parrot,25.0,0.4


In [13]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                   index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                   index=[4, 5, 6, 7])

In [14]:
df_concat = pd.concat([df1, df2])

In [15]:
df_concat

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [16]:
df3 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                    'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3']})

df4 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']})

In [17]:
df_merge = pd.merge(df3, df4, on='key')

In [18]:
df_merge

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3
