In [1]:
import pandas as pd

# Merging

In [2]:
frame1 = pd.DataFrame({'id': ['ball', 'pencil', 'pen', 'mug', 'ashtray'],
                       'price': [12.33, 11.44, 33.21, 13.23, 33.62]})
frame2 = pd.DataFrame({'id': ['pencil', 'pencil', 'ball', 'pen'],
                       'color': ['white', 'red', 'red', 'black']})

In [4]:
frame1

Unnamed: 0,id,price
0,ball,12.33
1,pencil,11.44
2,pen,33.21
3,mug,13.23
4,ashtray,33.62


In [5]:
frame2

Unnamed: 0,id,color
0,pencil,white
1,pencil,red
2,ball,red
3,pen,black


In [6]:
pd.merge(frame1, frame2)

Unnamed: 0,id,price,color
0,ball,12.33,red
1,pencil,11.44,white
2,pencil,11.44,red
3,pen,33.21,black


In [7]:
frame1 = pd.DataFrame({'id': ['ball', 'pencil', 'pen', 'mug', 'ashtray'],
                       'color': ['white', 'red', 'red', 'black', 'green'],
                       'brand': ['OMG', 'ABC', 'ABC', 'POD', 'POD']})
frame1

Unnamed: 0,id,color,brand
0,ball,white,OMG
1,pencil,red,ABC
2,pen,red,ABC
3,mug,black,POD
4,ashtray,green,POD


In [8]:
frame2 = pd.DataFrame({'id': ['pencil', 'pencil', 'ball', 'pen'],
                       'brand': ['OMG', 'POD', 'ABC', 'POD']})
frame2

Unnamed: 0,id,brand
0,pencil,OMG
1,pencil,POD
2,ball,ABC
3,pen,POD


In [10]:
pd.merge(frame1, frame2)

Unnamed: 0,id,color,brand


In [9]:
pd.merge(frame1, frame2, on='id')

Unnamed: 0,id,color,brand_x,brand_y
0,ball,white,OMG,ABC
1,pencil,red,ABC,OMG
2,pencil,red,ABC,POD
3,pen,red,ABC,POD


## merging on the column with different names

In [14]:
frame2.columns = ['sid', 'brand']
frame2

Unnamed: 0,sid,brand
0,pencil,OMG
1,pencil,POD
2,ball,ABC
3,pen,POD


In [15]:
pd.merge(frame1, frame2, left_on='id', right_on='sid')

Unnamed: 0,id,color,brand_x,sid,brand_y
0,ball,white,OMG,ball,ABC
1,pencil,red,ABC,pencil,OMG
2,pencil,red,ABC,pencil,POD
3,pen,red,ABC,pen,POD


## outer, left, right joins

In [16]:
frame2.columns = ['id', 'brand']
frame1

Unnamed: 0,id,color,brand
0,ball,white,OMG
1,pencil,red,ABC
2,pen,red,ABC
3,mug,black,POD
4,ashtray,green,POD


In [20]:
frame2

Unnamed: 0,id,brand
0,pencil,OMG
1,pencil,POD
2,ball,ABC
3,pen,POD


In [17]:
pd.merge(frame1, frame2, on='id')

Unnamed: 0,id,color,brand_x,brand_y
0,ball,white,OMG,ABC
1,pencil,red,ABC,OMG
2,pencil,red,ABC,POD
3,pen,red,ABC,POD


In [18]:
pd.merge(frame1, frame2, on='id', how='outer')

Unnamed: 0,id,color,brand_x,brand_y
0,ashtray,green,POD,
1,ball,white,OMG,ABC
2,mug,black,POD,
3,pen,red,ABC,POD
4,pencil,red,ABC,OMG
5,pencil,red,ABC,POD


In [19]:
pd.merge(frame1, frame2, on='id', how='left')

Unnamed: 0,id,color,brand_x,brand_y
0,ball,white,OMG,ABC
1,pencil,red,ABC,OMG
2,pencil,red,ABC,POD
3,pen,red,ABC,POD
4,mug,black,POD,
5,ashtray,green,POD,


In [21]:
pd.merge(frame1, frame2, on='id', how='right')

Unnamed: 0,id,color,brand_x,brand_y
0,pencil,red,ABC,OMG
1,pencil,red,ABC,POD
2,ball,white,OMG,ABC
3,pen,red,ABC,POD


In [22]:
pd.merge(frame1, frame2,
         on=['id', 'brand'], how='outer')

Unnamed: 0,id,color,brand
0,ashtray,green,POD
1,ball,,ABC
2,ball,white,OMG
3,mug,black,POD
4,pen,red,ABC
5,pen,,POD
6,pencil,red,ABC
7,pencil,,OMG
8,pencil,,POD


## merge by index

In [24]:
pd.merge(
    frame1,
    frame2,
    left_index=True,
    right_index=True
)


Unnamed: 0,id_x,color,brand_x,id_y,brand_y
0,ball,white,OMG,pencil,OMG
1,pencil,red,ABC,pencil,POD
2,pen,red,ABC,ball,ABC
3,mug,black,POD,pen,POD


In [25]:
frame1.join(frame2)

ValueError: columns overlap but no suffix specified: Index(['id', 'brand'], dtype='object')

In [26]:
frame2_renamed = frame2.rename(columns={'id': 'id2', 'brand': 'brand2'})
frame2_renamed

Unnamed: 0,id2,brand2
0,pencil,OMG
1,pencil,POD
2,ball,ABC
3,pen,POD


In [28]:
frame1.join(frame2_renamed)

Unnamed: 0,id,color,brand,id2,brand2
0,ball,white,OMG,pencil,OMG
1,pencil,red,ABC,pencil,POD
2,pen,red,ABC,ball,ABC
3,mug,black,POD,pen,POD
4,ashtray,green,POD,,


# Concatenation

In [30]:
import numpy as np, pandas as pd

array1 = np.arange(9).reshape(3, 3)
array2 = np.arange(9).reshape(3, 3) + 6

In [31]:
array1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [32]:
array2

array([[ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [36]:
np.concatenate([array1, array2], axis=1)

array([[ 0,  1,  2,  6,  7,  8],
       [ 3,  4,  5,  9, 10, 11],
       [ 6,  7,  8, 12, 13, 14]])

In [35]:
np.concatenate([array1, array2], axis=0)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [37]:
ser1 = pd.Series(np.random.rand(4), index=[1, 2, 3, 4])
ser2 = pd.Series(np.random.rand(4), index=[5, 6, 7, 8])

In [38]:
ser1

1    0.922436
2    0.967342
3    0.016317
4    0.513251
dtype: float64

In [39]:
ser2

5    0.750735
6    0.689510
7    0.677939
8    0.015256
dtype: float64

In [40]:
# вертикально (індекси об’єднуються)
pd.concat([ser1, ser2])

1    0.922436
2    0.967342
3    0.016317
4    0.513251
5    0.750735
6    0.689510
7    0.677939
8    0.015256
dtype: float64

In [41]:
# горизонтально – стовпці 0 і 1
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
1,0.922436,
2,0.967342,
3,0.016317,
4,0.513251,
5,,0.750735
6,,0.68951
7,,0.677939
8,,0.015256


In [42]:
# вертикально з багаторівневим індексом
pd.concat([ser1, ser2], keys=['A', 'B'])

A  1    0.922436
   2    0.967342
   3    0.016317
   4    0.513251
B  5    0.750735
   6    0.689510
   7    0.677939
   8    0.015256
dtype: float64

In [43]:
# горизонтально з підписами колонок
pd.concat([ser1, ser2], axis=1, keys=['A', 'B'])

Unnamed: 0,A,B
1,0.922436,
2,0.967342,
3,0.016317,
4,0.513251,
5,,0.750735
6,,0.68951
7,,0.677939
8,,0.015256


In [44]:
frame1 = pd.DataFrame(np.random.rand(9).reshape(3, 3),
                      index=[1, 2, 3], columns=['A', 'B', 'C'])
frame2 = pd.DataFrame(np.random.rand(9).reshape(3, 3),
                      index=[4, 5, 6], columns=['A', 'B', 'C'])

In [45]:
frame1

Unnamed: 0,A,B,C
1,0.432452,0.236868,0.533964
2,0.404811,0.13747,0.558981
3,0.412519,0.094956,0.962092


In [46]:
frame2

Unnamed: 0,A,B,C
4,0.769756,0.625399,0.489272
5,0.791935,0.930005,0.192615
6,0.518723,0.75427,0.100885


In [47]:
# додати рядки (axis=0)
pd.concat([frame1, frame2])

Unnamed: 0,A,B,C
1,0.432452,0.236868,0.533964
2,0.404811,0.13747,0.558981
3,0.412519,0.094956,0.962092
4,0.769756,0.625399,0.489272
5,0.791935,0.930005,0.192615
6,0.518723,0.75427,0.100885


In [48]:
# додати колонки (axis=1)
pd.concat([frame1, frame2], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
1,0.432452,0.236868,0.533964,,,
2,0.404811,0.13747,0.558981,,,
3,0.412519,0.094956,0.962092,,,
4,,,,0.769756,0.625399,0.489272
5,,,,0.791935,0.930005,0.192615
6,,,,0.518723,0.75427,0.100885


# Combining

In [49]:
import numpy as np, pandas as pd

ser1 = pd.Series(np.random.rand(5), index=[1, 2, 3, 4, 5])
ser2 = pd.Series(np.random.rand(4), index=[2, 4, 5, 6])

In [50]:
ser1

1    0.937909
2    0.188290
3    0.300720
4    0.774750
5    0.784752
dtype: float64

In [52]:
ser2

2    0.990589
4    0.150656
5    0.021576
6    0.122410
dtype: float64

In [51]:
ser1.combine_first(ser2)

1    0.937909
2    0.188290
3    0.300720
4    0.774750
5    0.784752
6    0.122410
dtype: float64

In [54]:
partial = ser1[:3].combine_first(ser2[:3])
partial

1    0.937909
2    0.188290
3    0.300720
4    0.150656
5    0.021576
dtype: float64

# Pivoting

In [55]:
import numpy as np, pandas as pd

frame1 = pd.DataFrame(
    np.arange(9).reshape(3, 3),
    index=['white', 'black', 'red'],
    columns=['ball', 'pen', 'pencil']
)

frame1


Unnamed: 0,ball,pen,pencil
white,0,1,2
black,3,4,5
red,6,7,8


In [56]:
# перетворюємо стовпці у рівень індексу
ser5 = frame1.stack()
ser5

white  ball      0
       pen       1
       pencil    2
black  ball      3
       pen       4
       pencil    5
red    ball      6
       pen       7
       pencil    8
dtype: int64

In [57]:
# повертаємо у вихідну форму
original = ser5.unstack()
original

Unnamed: 0,ball,pen,pencil
white,0,1,2
black,3,4,5
red,6,7,8


In [59]:
# unstack іншого рівня
pivoted = ser5.unstack(0)  # колонки ← перший рівень індексу
pivoted

Unnamed: 0,white,black,red
ball,0,3,6
pen,1,4,7
pencil,2,5,8


In [63]:
longframe = pd.DataFrame({
    'color': ['white'] * 3 + ['red'] * 3 + ['black'] * 3,
    'item': ['ball', 'pen', 'mug'] * 3,
    'value': np.random.rand(9)
})
longframe


Unnamed: 0,color,item,value
0,white,ball,0.819844
1,white,pen,0.916285
2,white,mug,0.248783
3,red,ball,0.487458
4,red,pen,0.481999
5,red,mug,0.879743
6,black,ball,0.09763
7,black,pen,0.943116
8,black,mug,0.299076


In [67]:
# 'color' стане індексом, 'item' – колонками, 'value' – заповнює клітинки
wideframe = longframe.pivot(index='color', columns='item')
print(wideframe)

          value                    
item       ball       mug       pen
color                              
black  0.097630  0.299076  0.943116
red    0.487458  0.879743  0.481999
white  0.819844  0.248783  0.916285


# Removing

In [68]:
import numpy as np, pandas as pd

frame1 = pd.DataFrame(
    np.arange(9).reshape(3, 3),
    index=['white', 'black', 'red'],
    columns=['ball', 'pen', 'pencil']
)


frame1

Unnamed: 0,ball,pen,pencil
white,0,1,2
black,3,4,5
red,6,7,8


In [69]:
# спосіб 1 — del (in-place)
del frame1['ball']

frame1

Unnamed: 0,pen,pencil
white,1,2
black,4,5
red,7,8


In [70]:
# спосіб 2 — drop, повертає копію
frame2 = frame1.drop('pen', axis=1)
frame2

Unnamed: 0,pencil
white,2
black,5
red,8


In [71]:
# початковий frame1 (стовпець 'ball' уже прибрано)
cleaned = frame1.drop('white')        # видалити один рядок
cleaned

Unnamed: 0,pen,pencil
black,4,5
red,7,8


In [72]:
# видалити одразу декілька рядків
cleaned2 = frame1.drop(['white', 'red'])
cleaned2

Unnamed: 0,pen,pencil
black,4,5


In [73]:
# inplace-видалення рядка
frame1.drop('black', inplace=True)

frame1

Unnamed: 0,pen,pencil
white,1,2
red,7,8
