In [49]:
import numpy as np
import pandas as pd

## numpy arrays

In [50]:
arr = np.arange(1, 11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [51]:
arr ** 2

array([  1,   4,   9,  16,  25,  36,  49,  64,  81, 100], dtype=int32)

In [52]:
2 ** arr

array([   2,    4,    8,   16,   32,   64,  128,  256,  512, 1024],
      dtype=int32)

## Series

In [53]:
s1 = pd.Series(2**arr)
s1

0       2
1       4
2       8
3      16
4      32
5      64
6     128
7     256
8     512
9    1024
dtype: int32

In [54]:
s1.where([True, True, True, False, False, False, False, False, False, False])

0    2.0
1    4.0
2    8.0
3    NaN
4    NaN
5    NaN
6    NaN
7    NaN
8    NaN
9    NaN
dtype: float64

In [55]:
s1.where([True, True, True, False, False, False, False, False, False, False]).dropna()

0    2.0
1    4.0
2    8.0
dtype: float64

In [56]:
s1.filter(like='2')

2    8
dtype: int32

In [57]:
s2 = s1.filter(items=[2,3,5])
s2

2     8
3    16
5    64
dtype: int32

In [58]:
dict(s2)

{2: 8, 3: 16, 5: 64}

In [59]:
2 in s2, 10 in s2

(True, False)

In [60]:
s1.loc[2]

8

In [61]:
s1.iloc[2]

8

In [62]:
s1.index = pd.RangeIndex(start=1, stop=11, step=1)

In [63]:
s1.index

RangeIndex(start=1, stop=11, step=1)

In [64]:
s1

1        2
2        4
3        8
4       16
5       32
6       64
7      128
8      256
9      512
10    1024
dtype: int32

In [65]:
s1.loc[2], s1.iloc[2]

(4, 8)

In [66]:
s1[2]  # filter calls by idx name; like loc

4

In [67]:
s1[2:4]  # slice calls by idx position; like iloc

3     8
4    16
dtype: int32

In [68]:
s1.loc[2:4]  # by name with last item

2     4
3     8
4    16
dtype: int32

In [69]:
s1.iloc[2:4]  # by position without last item

3     8
4    16
dtype: int32

In [70]:
s1[[7,5,1]]

7    128
5     32
1      2
dtype: int32

In [71]:
s1[-1:]

10    1024
dtype: int32

In [72]:
s1[5:-2]

6     64
7    128
8    256
dtype: int32

In [73]:
s1[2:10:3]

3      8
6     64
9    512
dtype: int32

In [74]:
s1[2:-1:3]

3      8
6     64
9    512
dtype: int32

In [75]:
s1[-1::-1]

10    1024
9      512
8      256
7      128
6       64
5       32
4       16
3        8
2        4
1        2
dtype: int32

In [76]:
s1[::-1]

10    1024
9      512
8      256
7      128
6       64
5       32
4       16
3        8
2        4
1        2
dtype: int32

In [77]:
s1.idxmin(), s1.idxmax()

(1, 10)

In [78]:
s1.is_monotonic

True

In [79]:
s1.is_monotonic_decreasing

False

In [80]:
s1.nbytes

40

In [81]:
s1.values

array([   2,    4,    8,   16,   32,   64,  128,  256,  512, 1024],
      dtype=int32)

In [82]:
s1.sum()

2046

In [83]:
s1.index.intersection(s2.index)

Int64Index([2, 3, 5], dtype='int64')

In [84]:
d1 = {
    'bułka': 2,
    'chleb': 4,
    'masło': 10,
    'szynka': 20
}
s3 = pd.Series(d1)
s3

bułka      2
chleb      4
masło     10
szynka    20
dtype: int64

In [85]:
d2 = dict(
    Janek='chleb',
    Asia='bułka',
    Jarek='szynka',
    Bartek='szynka'
)
s4 = pd.Series(d2)
s4

Janek      chleb
Asia       bułka
Jarek     szynka
Bartek    szynka
dtype: object

In [86]:
s4.map(s3)

Janek      4
Asia       2
Jarek     20
Bartek    20
dtype: int64

## DataFrame

In [87]:
df1 = pd.DataFrame(s4)
df1

Unnamed: 0,0
Janek,chleb
Asia,bułka
Jarek,szynka
Bartek,szynka


In [88]:
df2 = pd.DataFrame(s4.map(s3), columns=['price'])
df2

Unnamed: 0,price
Janek,4
Asia,2
Jarek,20
Bartek,20


In [89]:
df1['price'] = df2['price']
df1

Unnamed: 0,0,price
Janek,chleb,4
Asia,bułka,2
Jarek,szynka,20
Bartek,szynka,20


In [90]:
# df1['Grzesiek'] = ['obiad', 50]
df1.loc['Janek']

0        chleb
price        4
Name: Janek, dtype: object

In [91]:
df1['price_rank'] = df1['price'].rank(method='max')
df1

Unnamed: 0,0,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0
Jarek,szynka,20,4.0
Bartek,szynka,20,4.0


In [92]:
df1.nlargest(1, 'price')

Unnamed: 0,0,price,price_rank
Jarek,szynka,20,4.0


In [93]:
df1.nsmallest(3, 'price')

Unnamed: 0,0,price,price_rank
Asia,bułka,2,1.0
Janek,chleb,4,2.0
Jarek,szynka,20,4.0


In [94]:
df1.rename(columns={0: 'prod'}, inplace=True)

In [95]:
df1[~ (df1['prod'] == 'szynka')]

Unnamed: 0,prod,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0


In [96]:
df1['price'].is_unique

False

In [97]:
df1['price'].unique()

array([ 4,  2, 20], dtype=int64)

In [98]:
df1[df1['price'].duplicated(keep='first')]

Unnamed: 0,prod,price,price_rank
Bartek,szynka,20,4.0


In [99]:
df1[df1['price'].duplicated(keep='last')]

Unnamed: 0,prod,price,price_rank
Jarek,szynka,20,4.0


In [100]:
df1[~ (df1['price'].duplicated(keep='last'))]

Unnamed: 0,prod,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0
Bartek,szynka,20,4.0


## data modification

In [101]:
df1

Unnamed: 0,prod,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0
Jarek,szynka,20,4.0
Bartek,szynka,20,4.0


In [102]:
df1.rename(index={'Janek': 'Czarek'})

Unnamed: 0,prod,price,price_rank
Czarek,chleb,4,2.0
Asia,bułka,2,1.0
Jarek,szynka,20,4.0
Bartek,szynka,20,4.0


In [103]:
df1.drop(labels=['Janek', 'Bartek'])

Unnamed: 0,prod,price,price_rank
Asia,bułka,2,1.0
Jarek,szynka,20,4.0


In [104]:
df1.drop(labels=['prod', 'price_rank'], axis=1)

Unnamed: 0,price
Janek,4
Asia,2
Jarek,20
Bartek,20


### adding rows

In [105]:
df3 = df1.iloc[-2:]
df3

Unnamed: 0,prod,price,price_rank
Jarek,szynka,20,4.0
Bartek,szynka,20,4.0


In [106]:
df3.loc['Bartek'] = ['jagody', 50, 5]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, v, pi)


In [107]:
df3.rename(index={'Bartek': 'Zenek'}, inplace=True)
df3

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,prod,price,price_rank
Jarek,szynka,20,4.0
Zenek,jagody,50,5.0


In [108]:
df1

Unnamed: 0,prod,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0
Jarek,szynka,20,4.0
Bartek,jagody,50,5.0


In [109]:
df4 = df1.append(df3)
df4

Unnamed: 0,prod,price,price_rank
Janek,chleb,4,2.0
Asia,bułka,2,1.0
Jarek,szynka,20,4.0
Bartek,jagody,50,5.0
Jarek,szynka,20,4.0
Zenek,jagody,50,5.0


In [110]:
df4.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Janek to Zenek
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   prod        6 non-null      object 
 1   price       6 non-null      int64  
 2   price_rank  6 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 192.0+ bytes


## change index

In [111]:
df4.set_index(pd.RangeIndex(6))  # lost index data

Unnamed: 0,prod,price,price_rank
0,chleb,4,2.0
1,bułka,2,1.0
2,szynka,20,4.0
3,jagody,50,5.0
4,szynka,20,4.0
5,jagody,50,5.0


In [112]:
df4.reset_index()

Unnamed: 0,index,prod,price,price_rank
0,Janek,chleb,4,2.0
1,Asia,bułka,2,1.0
2,Jarek,szynka,20,4.0
3,Bartek,jagody,50,5.0
4,Jarek,szynka,20,4.0
5,Zenek,jagody,50,5.0


In [113]:
df4.reset_index().rename(columns={'index': 'name'}).set_index('price_rank')

Unnamed: 0_level_0,name,prod,price
price_rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2.0,Janek,chleb,4
1.0,Asia,bułka,2
4.0,Jarek,szynka,20
5.0,Bartek,jagody,50
4.0,Jarek,szynka,20
5.0,Zenek,jagody,50


In [114]:
ptrans = pd.read_csv(
    './course-files/course-sources/PublicTransitExpenses.csv',
    usecols=['Agency', 'Reporter Type', 'Total Operating Expenses']
)
ptrans.head()

Unnamed: 0,Agency,Reporter Type,Total Operating Expenses
0,Washington County Commissioners,Reduced Reporter,$122524.00
1,Washington County Commissioners,Reduced Reporter,$272715.00
2,"Texoma Area Paratransit System, Inc",Full Reporter,$7295.00
3,Kalispel Tribe of Indians,Reduced Reporter,$37416.00
4,Kalispel Tribe of Indians,Reduced Reporter,$345789.00


In [115]:
ptrans['Agency'].str.split(' ')

0              [Washington, County, Commissioners]
1              [Washington, County, Commissioners]
2        [Texoma, Area, Paratransit, System,, Inc]
3                   [Kalispel, Tribe, of, Indians]
4                   [Kalispel, Tribe, of, Indians]
                           ...                    
17839       [Cedar, Area, Transportation, Service]
17840                             [Tehama, County]
17841                          [City, of, Dixon, ]
17842         [Morongo, Basin, Transit, Authority]
17843                            [City, of, Arvin]
Name: Agency, Length: 17844, dtype: object

In [116]:
ptrans['Agency'].str.split(' ', expand=True, n=5)

Unnamed: 0,0,1,2,3,4,5
0,Washington,County,Commissioners,,,
1,Washington,County,Commissioners,,,
2,Texoma,Area,Paratransit,"System,",Inc,
3,Kalispel,Tribe,of,Indians,,
4,Kalispel,Tribe,of,Indians,,
...,...,...,...,...,...,...
17839,Cedar,Area,Transportation,Service,,
17840,Tehama,County,,,,
17841,City,of,Dixon,,,
17842,Morongo,Basin,Transit,Authority,,


## Multiindex

In [2]:
incidents = pd.read_csv(
    './course-files/course-sources/Canadian Railway Crossing Incidents.csv'
)
incidents

Unnamed: 0,Region,EventType,Public automated,Public passive,Private,Farm
0,Newfoundland,Accidents,0,0,0,0
1,Newfoundland,Fatalities,0,0,0,0
2,Newfoundland,Serious injuries,0,0,0,0
3,Nova Scotia,Accidents,1,0,0,0
4,Nova Scotia,Fatalities,0,0,0,0
5,Nova Scotia,Serious injuries,0,0,0,0
6,New Brunswick,Accidents,0,0,0,0
7,New Brunswick,Fatalities,0,0,0,0
8,New Brunswick,Serious injuries,0,0,0,0
9,Quebec,Accidents,4,0,1,0


In [9]:
incidents.set_index(['Region', 'EventType'], inplace=True)

In [11]:
incidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Newfoundland,Accidents,0,0,0,0
Newfoundland,Fatalities,0,0,0,0
Newfoundland,Serious injuries,0,0,0,0
Nova Scotia,Accidents,1,0,0,0
Nova Scotia,Fatalities,0,0,0,0


In [17]:
incidents.loc[('Newfoundland', 'Fatalities')]

Public automated    0
Public passive      0
Private             0
Farm                0
Name: (Newfoundland, Fatalities), dtype: int64

In [19]:
incidents.loc[:, 'Private']

Region                 EventType       
Newfoundland           Accidents           0
                       Fatalities          0
                       Serious injuries    0
Nova Scotia            Accidents           0
                       Fatalities          0
                       Serious injuries    0
New Brunswick          Accidents           0
                       Fatalities          0
                       Serious injuries    0
Quebec                 Accidents           1
                       Fatalities          0
                       Serious injuries    0
Ontario                Accidents           2
                       Fatalities          0
                       Serious injuries    0
Manitoba               Accidents           0
                       Fatalities          0
                       Serious injuries    0
Saskatchewan           Accidents           0
                       Fatalities          0
                       Serious injuries    0
Alberta        

In [21]:
incidents.loc['Newfoundland', 'Private']

EventType
Accidents           0
Fatalities          0
Serious injuries    0
Name: Private, dtype: int64

In [22]:
            #  x,                           y
incidents.loc[('Newfoundland', 'Fatalities'), 'Private']

0

In [23]:
incidents.transpose()

Region,Newfoundland,Newfoundland,Newfoundland,Nova Scotia,Nova Scotia,Nova Scotia,New Brunswick,New Brunswick,New Brunswick,Quebec,...,Saskatchewan,Alberta,Alberta,Alberta,British Columbia,British Columbia,British Columbia,Northwest Territories,Northwest Territories,Northwest Territories
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,...,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Public automated,0,0,0,1,0,0,0,0,0,4,...,0,3,0,0,4,0,3,0,0,0
Public passive,0,0,0,0,0,0,0,0,0,0,...,0,7,0,1,1,0,0,0,0,0
Private,0,0,0,0,0,0,0,0,0,1,...,0,2,0,2,0,0,0,0,0,0
Farm,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
incidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Newfoundland,Accidents,0,0,0,0
Newfoundland,Fatalities,0,0,0,0
Newfoundland,Serious injuries,0,0,0,0
Nova Scotia,Accidents,1,0,0,0
Nova Scotia,Fatalities,0,0,0,0


In [25]:
incidents.swaplevel().sort_index().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
EventType,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accidents,Alberta,3,7,2,0
Accidents,British Columbia,4,1,0,0
Accidents,Manitoba,1,3,0,2
Accidents,New Brunswick,0,0,0,0
Accidents,Newfoundland,0,0,0,0


In [31]:
incidents.shape

(30, 4)

### work with stacked DF

In [172]:
stacked_inc = incidents.stack().to_frame()
print(stacked_inc.shape)
stacked_inc

(120, 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1
Newfoundland,Accidents,Public automated,0
Newfoundland,Accidents,Public passive,0
Newfoundland,Accidents,Private,0
Newfoundland,Accidents,Farm,0
Newfoundland,Fatalities,Public automated,0
...,...,...,...
Northwest Territories,Fatalities,Farm,0
Northwest Territories,Serious injuries,Public automated,0
Northwest Territories,Serious injuries,Public passive,0
Northwest Territories,Serious injuries,Private,0


In [233]:
stacked_inc.index

MultiIndex([(         'Newfoundland',        'Accidents', 'Public automated'),
            (         'Newfoundland',        'Accidents',   'Public passive'),
            (         'Newfoundland',        'Accidents',          'Private'),
            (         'Newfoundland',        'Accidents',             'Farm'),
            (         'Newfoundland',       'Fatalities', 'Public automated'),
            (         'Newfoundland',       'Fatalities',   'Public passive'),
            (         'Newfoundland',       'Fatalities',          'Private'),
            (         'Newfoundland',       'Fatalities',             'Farm'),
            (         'Newfoundland', 'Serious injuries', 'Public automated'),
            (         'Newfoundland', 'Serious injuries',   'Public passive'),
            ...
            ('Northwest Territories',        'Accidents',          'Private'),
            ('Northwest Territories',        'Accidents',             'Farm'),
            ('Northwest Territories'

In [234]:
stacked_inc.index.get_level_values(0)

Index(['Newfoundland', 'Newfoundland', 'Newfoundland', 'Newfoundland',
       'Newfoundland', 'Newfoundland', 'Newfoundland', 'Newfoundland',
       'Newfoundland', 'Newfoundland',
       ...
       'Northwest Territories', 'Northwest Territories',
       'Northwest Territories', 'Northwest Territories',
       'Northwest Territories', 'Northwest Territories',
       'Northwest Territories', 'Northwest Territories',
       'Northwest Territories', 'Northwest Territories'],
      dtype='object', name='Region', length=120)

In [235]:
stacked_inc.index.get_level_values(1)

Index(['Accidents', 'Accidents', 'Accidents', 'Accidents', 'Fatalities',
       'Fatalities', 'Fatalities', 'Fatalities', 'Serious injuries',
       'Serious injuries',
       ...
       'Accidents', 'Accidents', 'Fatalities', 'Fatalities', 'Fatalities',
       'Fatalities', 'Serious injuries', 'Serious injuries',
       'Serious injuries', 'Serious injuries'],
      dtype='object', name='EventType', length=120)

In [236]:
stacked_inc.index.get_level_values(2)

Index(['Public automated', 'Public passive', 'Private', 'Farm',
       'Public automated', 'Public passive', 'Private', 'Farm',
       'Public automated', 'Public passive',
       ...
       'Private', 'Farm', 'Public automated', 'Public passive', 'Private',
       'Farm', 'Public automated', 'Public passive', 'Private', 'Farm'],
      dtype='object', name='CrossType', length=120)

In [173]:
print(stacked_inc.index.names)
stacked_inc.index.names = ['Region', 'EventType', 'CrossType']
stacked_inc.index.names

['Region', 'EventType', None]


FrozenList(['Region', 'EventType', 'CrossType'])

In [177]:
print(stacked_inc.columns)
stacked_inc.columns = ['counted']
stacked_inc.columns

RangeIndex(start=0, stop=1, step=1)


Index(['counted'], dtype='object')

In [178]:
stacked_inc.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 120 entries, ('Newfoundland', 'Accidents', 'Public automated') to ('Northwest Territories', 'Serious injuries', 'Farm')
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   counted  120 non-null    int64
dtypes: int64(1)
memory usage: 2.2+ KB


In [179]:
stacked_inc.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,counted,counted,counted,counted
Unnamed: 0_level_1,CrossType,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Alberta,Accidents,3,7,2,0
Alberta,Fatalities,0,0,0,0
Alberta,Serious injuries,0,1,2,0
British Columbia,Accidents,4,1,0,0
British Columbia,Fatalities,0,0,0,0
British Columbia,Serious injuries,3,0,0,0
Manitoba,Accidents,1,3,0,2
Manitoba,Fatalities,0,1,0,0
Manitoba,Serious injuries,0,0,0,1
New Brunswick,Accidents,0,0,0,0


In [180]:
stacked_inc.unstack().unstack()

Unnamed: 0_level_0,counted,counted,counted,counted,counted,counted,counted,counted,counted,counted,counted,counted
CrossType,Public automated,Public automated,Public automated,Public passive,Public passive,Public passive,Private,Private,Private,Farm,Farm,Farm
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Region,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Alberta,3,0,0,7,0,1,2,0,2,0,0,0
British Columbia,4,0,3,1,0,0,0,0,0,0,0,0
Manitoba,1,0,0,3,1,0,0,0,0,2,0,1
New Brunswick,0,0,0,0,0,0,0,0,0,0,0,0
Newfoundland,0,0,0,0,0,0,0,0,0,0,0,0
Northwest Territories,0,0,0,0,0,0,0,0,0,0,0,0
Nova Scotia,1,0,0,0,0,0,0,0,0,0,0,0
Ontario,7,1,1,0,0,0,2,0,0,0,0,0
Quebec,4,1,0,0,0,0,1,0,0,0,0,0
Saskatchewan,1,0,0,3,0,0,0,0,0,0,0,0


In [181]:
incidents.unstack()

Unnamed: 0_level_0,Public automated,Public automated,Public automated,Public passive,Public passive,Public passive,Private,Private,Private,Farm,Farm,Farm
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Alberta,3,0,0,7,0,1,2,0,2,0,0,0
British Columbia,4,0,3,1,0,0,0,0,0,0,0,0
Manitoba,1,0,0,3,1,0,0,0,0,2,0,1
New Brunswick,0,0,0,0,0,0,0,0,0,0,0,0
Newfoundland,0,0,0,0,0,0,0,0,0,0,0,0
Northwest Territories,0,0,0,0,0,0,0,0,0,0,0,0
Nova Scotia,1,0,0,0,0,0,0,0,0,0,0,0
Ontario,7,1,1,0,0,0,2,0,0,0,0,0
Quebec,4,1,0,0,0,0,1,0,0,0,0,0
Saskatchewan,1,0,0,3,0,0,0,0,0,0,0,0


In [182]:
incidents.reset_index().stack().to_frame().head(30)  # it doesn't have a sense

Unnamed: 0,Unnamed: 1,0
0,Region,Newfoundland
0,EventType,Accidents
0,Public automated,0
0,Public passive,0
0,Private,0
0,Farm,0
1,Region,Newfoundland
1,EventType,Fatalities
1,Public automated,0
1,Public passive,0


In [183]:
stacked_inc

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,counted
Region,EventType,CrossType,Unnamed: 3_level_1
Newfoundland,Accidents,Public automated,0
Newfoundland,Accidents,Public passive,0
Newfoundland,Accidents,Private,0
Newfoundland,Accidents,Farm,0
Newfoundland,Fatalities,Public automated,0
...,...,...,...
Northwest Territories,Fatalities,Farm,0
Northwest Territories,Serious injuries,Public automated,0
Northwest Territories,Serious injuries,Public passive,0
Northwest Territories,Serious injuries,Private,0


In [184]:
stacked_inc.unstack('EventType')

Unnamed: 0_level_0,Unnamed: 1_level_0,counted,counted,counted
Unnamed: 0_level_1,EventType,Accidents,Fatalities,Serious injuries
Region,CrossType,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Alberta,Public automated,3,0,0
Alberta,Public passive,7,0,1
Alberta,Private,2,0,2
Alberta,Farm,0,0,0
British Columbia,Public automated,4,0,3
British Columbia,Public passive,1,0,0
British Columbia,Private,0,0,0
British Columbia,Farm,0,0,0
Manitoba,Public automated,1,0,0
Manitoba,Public passive,3,1,0


In [185]:
stacked_inc.unstack('Region')

Unnamed: 0_level_0,Unnamed: 1_level_0,counted,counted,counted,counted,counted,counted,counted,counted,counted,counted
Unnamed: 0_level_1,Region,Alberta,British Columbia,Manitoba,New Brunswick,Newfoundland,Northwest Territories,Nova Scotia,Ontario,Quebec,Saskatchewan
EventType,CrossType,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Accidents,Public automated,3,4,1,0,0,0,1,7,4,1
Accidents,Public passive,7,1,3,0,0,0,0,0,0,3
Accidents,Private,2,0,0,0,0,0,0,2,1,0
Accidents,Farm,0,0,2,0,0,0,0,0,0,0
Fatalities,Public automated,0,0,0,0,0,0,0,1,1,0
Fatalities,Public passive,0,0,1,0,0,0,0,0,0,0
Fatalities,Private,0,0,0,0,0,0,0,0,0,0
Fatalities,Farm,0,0,0,0,0,0,0,0,0,0
Serious injuries,Public automated,0,3,0,0,0,0,0,1,0,0
Serious injuries,Public passive,1,0,0,0,0,0,0,0,0,0


In [164]:
stacked_inc

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
Region,EventType,CrossType,Unnamed: 3_level_1
Newfoundland,Accidents,Public automated,0
Newfoundland,Accidents,Public passive,0
Newfoundland,Accidents,Private,0
Newfoundland,Accidents,Farm,0
Newfoundland,Fatalities,Public automated,0
...,...,...,...
Northwest Territories,Fatalities,Farm,0
Northwest Territories,Serious injuries,Public automated,0
Northwest Territories,Serious injuries,Public passive,0
Northwest Territories,Serious injuries,Private,0


## Pivot table

In [186]:
sales = pd.read_csv('./course-files/course-sources/WA_Sales_Products_2012-14.csv')
sales

Unnamed: 0,Retailer country,Order method type,Retailer type,Product line,Product type,Product,Year,Quarter,Revenue,Quantity,Gross margin
0,United States,Fax,Outdoors Shop,Camping Equipment,Cooking Gear,TrailChef Deluxe Cook Set,2012,Q1 2012,59628.66,489,0.347548
1,United States,Fax,Outdoors Shop,Camping Equipment,Cooking Gear,TrailChef Double Flame,2012,Q1 2012,35950.32,252,0.474274
2,United States,Fax,Outdoors Shop,Camping Equipment,Tents,Star Dome,2012,Q1 2012,89940.48,147,0.352772
3,United States,Fax,Outdoors Shop,Camping Equipment,Tents,Star Gazer 2,2012,Q1 2012,165883.41,303,0.282938
4,United States,Fax,Outdoors Shop,Camping Equipment,Sleeping Bags,Hibernator Lite,2012,Q1 2012,119822.20,1415,0.291450
...,...,...,...,...,...,...,...,...,...,...,...
88470,Spain,Sales visit,Outdoors Shop,Mountaineering Equipment,Rope,Husky Rope 60,2014,Q3 2014,30865.50,171,0.299114
88471,Spain,Sales visit,Outdoors Shop,Mountaineering Equipment,Climbing Accessories,Firefly Climbing Lamp,2014,Q3 2014,7485.29,191,0.446287
88472,Spain,Sales visit,Outdoors Shop,Mountaineering Equipment,Climbing Accessories,Firefly Charger,2014,Q3 2014,12255.48,236,0.569420
88473,Spain,Sales visit,Outdoors Shop,Mountaineering Equipment,Tools,Granite Axe,2014,Q3 2014,56448.00,1470,0.491667


In [192]:
# e.g. How many orders they got by order's method in next years?
ps = sales.pivot_table(
    values='Quantity',
    index='Order method type',
    columns=['Year']
)
ps

Year,2012,2013,2014
Order method type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E-mail,557.543466,463.249151,590.597458
Fax,469.68688,453.152727,560.328502
Mail,467.173315,433.402174,241.238095
Sales visit,552.981349,551.139399,707.14964
Special,541.693133,378.388889,1024.333333
Telephone,583.695697,499.756934,574.02901
Web,754.105918,843.022306,958.535879


In [191]:
sales.pivot_table(
    values='Quantity',
    index='Order method type',
    columns=['Year'],
    aggfunc=[np.min, np.max, np.median]
)

Unnamed: 0_level_0,amin,amin,amin,amax,amax,amax,median,median,median
Year,2012,2013,2014,2012,2013,2014,2012,2013,2014
Order method type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
E-mail,4,6,3,14967,10815,18684,237.0,248.0,287.0
Fax,1,8,4,8136,8377,5597,216.0,238.5,291.0
Mail,10,18,33,9076,8505,803,209.0,227.5,203.0
Sales visit,4,5,4,13756,15555,18727,257.0,273.0,343.5
Special,5,29,29,8777,3368,7241,198.0,241.5,638.5
Telephone,2,5,3,17492,9915,14286,239.0,249.5,313.0
Web,1,1,1,35122,42431,67875,315.0,375.0,403.0


In [194]:
ps

Year,2012,2013,2014
Order method type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E-mail,557.543466,463.249151,590.597458
Fax,469.68688,453.152727,560.328502
Mail,467.173315,433.402174,241.238095
Sales visit,552.981349,551.139399,707.14964
Special,541.693133,378.388889,1024.333333
Telephone,583.695697,499.756934,574.02901
Web,754.105918,843.022306,958.535879


### unpivot: melt

In [199]:
ps.melt()

Unnamed: 0,Year,value
0,2012,557.543466
1,2012,469.68688
2,2012,467.173315
3,2012,552.981349
4,2012,541.693133
5,2012,583.695697
6,2012,754.105918
7,2013,463.249151
8,2013,453.152727
9,2013,433.402174


In [232]:
ps.melt(ignore_index=False)

Unnamed: 0_level_0,Year,value
Order method type,Unnamed: 1_level_1,Unnamed: 2_level_1
E-mail,2012,557.543466
Fax,2012,469.68688
Mail,2012,467.173315
Sales visit,2012,552.981349
Special,2012,541.693133
Telephone,2012,583.695697
Web,2012,754.105918
E-mail,2013,463.249151
Fax,2013,453.152727
Mail,2013,433.402174


In [204]:
ps.reset_index()

Year,Order method type,2012,2013,2014
0,E-mail,557.543466,463.249151,590.597458
1,Fax,469.68688,453.152727,560.328502
2,Mail,467.173315,433.402174,241.238095
3,Sales visit,552.981349,551.139399,707.14964
4,Special,541.693133,378.388889,1024.333333
5,Telephone,583.695697,499.756934,574.02901
6,Web,754.105918,843.022306,958.535879


In [205]:
ps.reset_index().melt('Order method type')

Unnamed: 0,Order method type,Year,value
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [207]:
ps.reset_index().melt('Order method type').set_index('Order method type').sort_index()

Unnamed: 0_level_0,Year,value
Order method type,Unnamed: 1_level_1,Unnamed: 2_level_1
E-mail,2012,557.543466
E-mail,2014,590.597458
E-mail,2013,463.249151
Fax,2012,469.68688
Fax,2014,560.328502
Fax,2013,453.152727
Mail,2012,467.173315
Mail,2014,241.238095
Mail,2013,433.402174
Sales visit,2014,707.14964


In [209]:
ps.reset_index().melt('Order method type').set_index('Order method type').sort_index().stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Order method type,Unnamed: 1_level_1,Unnamed: 2_level_1
E-mail,Year,2012.0
E-mail,value,557.543466
E-mail,Year,2014.0
E-mail,value,590.597458
E-mail,Year,2013.0
E-mail,value,463.249151
Fax,Year,2012.0
Fax,value,469.68688
Fax,Year,2014.0
Fax,value,560.328502


In [216]:
ps.reset_index()

Year,Order method type,2012,2013,2014
0,E-mail,557.543466,463.249151,590.597458
1,Fax,469.68688,453.152727,560.328502
2,Mail,467.173315,433.402174,241.238095
3,Sales visit,552.981349,551.139399,707.14964
4,Special,541.693133,378.388889,1024.333333
5,Telephone,583.695697,499.756934,574.02901
6,Web,754.105918,843.022306,958.535879


In [217]:
ps.reset_index().melt(
    id_vars='Order method type'
)

Unnamed: 0,Order method type,Year,value
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [218]:
ps.reset_index().melt(
    id_vars='Order method type',
    value_vars=[2012, 2013]
)

Unnamed: 0,Order method type,Year,value
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [221]:
ps.reset_index().melt(
    id_vars='Order method type',
    value_vars=[2012, 2013],
    var_name='Y'
)

Unnamed: 0,Order method type,Y,value
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [220]:
ps.reset_index().melt(
    id_vars='Order method type',
    value_vars=[2012, 2013],
    var_name='Y',
    value_name='V'
)

Unnamed: 0,Order method type,Y,V
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [229]:
ps.reset_index().melt(
    id_vars='Order method type',
    value_vars=[2012, 2013],
    var_name='Y',
    value_name='V'
)

Unnamed: 0,Order method type,Y,V
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
7,E-mail,2013,463.249151
8,Fax,2013,453.152727
9,Mail,2013,433.402174


In [231]:
ps.reset_index().melt(
    id_vars='Order method type',
    value_vars=[2012, 2013],
    var_name='Y',
    value_name='V',
    ignore_index=False
)

Unnamed: 0,Order method type,Y,V
0,E-mail,2012,557.543466
1,Fax,2012,469.68688
2,Mail,2012,467.173315
3,Sales visit,2012,552.981349
4,Special,2012,541.693133
5,Telephone,2012,583.695697
6,Web,2012,754.105918
0,E-mail,2013,463.249151
1,Fax,2013,453.152727
2,Mail,2013,433.402174


# Joins

In [288]:
customers = pd.read_csv(
    './course-files/course-sources/northwind-mongo-master/customers.csv',
    usecols=['CustomerID', 'CompanyName', 'Country']
)
customers

Unnamed: 0,CustomerID,CompanyName,Country
0,ALFKI,Alfreds Futterkiste,Germany
1,ANATR,Ana Trujillo Emparedados y helados,Mexico
2,ANTON,Antonio Moreno Taquer?­a,Mexico
3,AROUT,Around the Horn,UK
4,BERGS,Berglunds snabbk?¶p,Sweden
...,...,...,...
86,WARTH,Wartian Herkku,Finland
87,WELLI,Wellington Importadora,Brazil
88,WHITC,White Clover Markets,USA
89,WILMK,Wilman Kala,Finland


In [300]:
prods = pd.read_csv(
    './course-files/course-sources/northwind-mongo-master/products.csv',
#     usecols=['SupplierID', 'CompanyName', 'Country']
)
prods

Unnamed: 0,ProductID,ProductName,SupplierID,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Chai,1,1,10 boxes x 20 bags,18.00,39,0,10,0
1,2,Chang,1,1,24 - 12 oz bottles,19.00,17,40,25,0
2,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.00,13,70,25,0
3,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.00,53,0,0,0
4,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
72,73,Röd Kaviar,17,8,24 - 150 g jars,15.00,101,0,5,0
73,74,Longlife Tofu,4,7,5 kg pkg.,10.00,4,20,5,0
74,75,Rhönbräu Klosterbier,12,1,24 - 0.5 l bottles,7.75,125,0,25,0
75,76,Lakkalikööri,23,1,500 ml,18.00,57,0,20,0


### append

In [246]:
suppliers.append(prods)

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,,,,,,,,,
1,2,New Orleans Cajun Delights,USA,,,,,,,,,
2,3,Grandma Kelly's Homestead,USA,,,,,,,,,
3,4,Tokyo Traders,Japan,,,,,,,,,
4,5,Cooperativa de Quesos 'Las Cabras',Spain,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
72,17,,,73.0,Röd Kaviar,8.0,24 - 150 g jars,15.00,101.0,0.0,5.0,0.0
73,4,,,74.0,Longlife Tofu,7.0,5 kg pkg.,10.00,4.0,20.0,5.0,0.0
74,12,,,75.0,Rhönbräu Klosterbier,1.0,24 - 0.5 l bottles,7.75,125.0,0.0,25.0,0.0
75,23,,,76.0,Lakkalikööri,1.0,500 ml,18.00,57.0,0.0,20.0,0.0


### concat

In [301]:
pd.concat(objs=[suppliers, prods])

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,,,,,,,,,
1,2,New Orleans Cajun Delights,USA,,,,,,,,,
2,3,Grandma Kelly's Homestead,USA,,,,,,,,,
3,4,Tokyo Traders,Japan,,,,,,,,,
4,5,Cooperativa de Quesos 'Las Cabras',Spain,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
72,17,,,73.0,Röd Kaviar,8.0,24 - 150 g jars,15.00,101.0,0.0,5.0,0.0
73,4,,,74.0,Longlife Tofu,7.0,5 kg pkg.,10.00,4.0,20.0,5.0,0.0
74,12,,,75.0,Rhönbräu Klosterbier,1.0,24 - 0.5 l bottles,7.75,125.0,0.0,25.0,0.0
75,23,,,76.0,Lakkalikööri,1.0,500 ml,18.00,57.0,0.0,20.0,0.0


In [302]:
pd.concat(
    objs=[suppliers, prods],
    keys='SupplierID'
)

Unnamed: 0,Unnamed: 1,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
S,0,1,Exotic Liquids,UK,,,,,,,,,
S,1,2,New Orleans Cajun Delights,USA,,,,,,,,,
S,2,3,Grandma Kelly's Homestead,USA,,,,,,,,,
S,3,4,Tokyo Traders,Japan,,,,,,,,,
S,4,5,Cooperativa de Quesos 'Las Cabras',Spain,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
u,72,17,,,73.0,Röd Kaviar,8.0,24 - 150 g jars,15.00,101.0,0.0,5.0,0.0
u,73,4,,,74.0,Longlife Tofu,7.0,5 kg pkg.,10.00,4.0,20.0,5.0,0.0
u,74,12,,,75.0,Rhönbräu Klosterbier,1.0,24 - 0.5 l bottles,7.75,125.0,0.0,25.0,0.0
u,75,23,,,76.0,Lakkalikööri,1.0,500 ml,18.00,57.0,0.0,20.0,0.0


In [303]:
pd.concat(
    objs=[suppliers, prods],
    axis=1,
    join='inner'
)

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,SupplierID.1,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,1,Chai,1,1,10 boxes x 20 bags,18.0,39,0,10,0
1,2,New Orleans Cajun Delights,USA,2,Chang,1,1,24 - 12 oz bottles,19.0,17,40,25,0
2,3,Grandma Kelly's Homestead,USA,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.0,13,70,25,0
3,4,Tokyo Traders,Japan,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.0,53,0,0,0
4,5,Cooperativa de Quesos 'Las Cabras',Spain,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35,0,0,0,1
5,6,Mayumi's,Japan,6,Grandma's Boysenberry Spread,3,2,12 - 8 oz jars,25.0,120,0,25,0
6,7,Pavlova Ltd.,Australia,7,Uncle Bob's Organic Dried Pears,3,7,12 - 1 lb pkgs.,30.0,15,0,10,0
7,8,Specialty Biscuits Ltd.,UK,8,Northwoods Cranberry Sauce,3,2,12 - 12 oz jars,40.0,6,0,0,0
8,9,PB Knäckebröd AB,Sweden,9,Mishi Kobe Niku,4,6,18 - 500 g pkgs.,97.0,29,0,0,1
9,10,Refrescos Americanas LTDA,Brazil,10,Ikura,4,8,12 - 200 ml jars,31.0,31,0,0,0


### join

In [304]:
# suppliers.join(prods)  # ValueError
suppliers.set_index('SupplierID').join(prods)

Unnamed: 0_level_0,CompanyName,Country,ProductID,ProductName,SupplierID,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
SupplierID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Exotic Liquids,UK,2,Chang,1,1,24 - 12 oz bottles,19.0,17,40,25,0
2,New Orleans Cajun Delights,USA,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.0,13,70,25,0
3,Grandma Kelly's Homestead,USA,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.0,53,0,0,0
4,Tokyo Traders,Japan,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35,0,0,0,1
5,Cooperativa de Quesos 'Las Cabras',Spain,6,Grandma's Boysenberry Spread,3,2,12 - 8 oz jars,25.0,120,0,25,0
6,Mayumi's,Japan,7,Uncle Bob's Organic Dried Pears,3,7,12 - 1 lb pkgs.,30.0,15,0,10,0
7,Pavlova Ltd.,Australia,8,Northwoods Cranberry Sauce,3,2,12 - 12 oz jars,40.0,6,0,0,0
8,Specialty Biscuits Ltd.,UK,9,Mishi Kobe Niku,4,6,18 - 500 g pkgs.,97.0,29,0,0,1
9,PB Knäckebröd AB,Sweden,10,Ikura,4,8,12 - 200 ml jars,31.0,31,0,0,0
10,Refrescos Americanas LTDA,Brazil,11,Queso Cabrales,5,4,1 kg pkg.,21.0,22,30,30,0


In [305]:
suppliers.set_index('SupplierID').join(prods, how='outer')

Unnamed: 0,CompanyName,Country,ProductID,ProductName,SupplierID,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,,,1,Chai,1,1,10 boxes x 20 bags,18.00,39,0,10,0
1,Exotic Liquids,UK,2,Chang,1,1,24 - 12 oz bottles,19.00,17,40,25,0
2,New Orleans Cajun Delights,USA,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.00,13,70,25,0
3,Grandma Kelly's Homestead,USA,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.00,53,0,0,0
4,Tokyo Traders,Japan,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
72,,,73,Röd Kaviar,17,8,24 - 150 g jars,15.00,101,0,5,0
73,,,74,Longlife Tofu,4,7,5 kg pkg.,10.00,4,20,5,0
74,,,75,Rhönbräu Klosterbier,12,1,24 - 0.5 l bottles,7.75,125,0,25,0
75,,,76,Lakkalikööri,23,1,500 ml,18.00,57,0,20,0


### merge

In [307]:
suppliers.merge(prods)

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,1,Chai,1,10 boxes x 20 bags,18.00,39,0,10,0
1,1,Exotic Liquids,UK,2,Chang,1,24 - 12 oz bottles,19.00,17,40,25,0
2,1,Exotic Liquids,UK,3,Aniseed Syrup,2,12 - 550 ml bottles,10.00,13,70,25,0
3,2,New Orleans Cajun Delights,USA,4,Chef Anton's Cajun Seasoning,2,48 - 6 oz jars,22.00,53,0,0,0
4,2,New Orleans Cajun Delights,USA,5,Chef Anton's Gumbo Mix,2,36 boxes,21.35,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
72,27,Escargots Nouveaux,France,58,Escargots de Bourgogne,8,24 pieces,13.25,62,0,20,0
73,28,Gai pâturage,France,59,Raclette Courdavault,4,5 kg pkg.,55.00,79,0,0,0
74,28,Gai pâturage,France,60,Camembert Pierrot,4,15 - 300 g rounds,34.00,19,0,0,0
75,29,Forêts d'érables,Canada,61,Sirop d'érable,2,24 - 500 ml bottles,28.50,113,0,25,0


In [271]:
suppliers.merge(prods, how='outer', left_on='SupplierID', right_on='SupplierID')

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,1,Chai,1,10 boxes x 20 bags,18.00,39,0,10,0
1,1,Exotic Liquids,UK,2,Chang,1,24 - 12 oz bottles,19.00,17,40,25,0
2,1,Exotic Liquids,UK,3,Aniseed Syrup,2,12 - 550 ml bottles,10.00,13,70,25,0
3,2,New Orleans Cajun Delights,USA,4,Chef Anton's Cajun Seasoning,2,48 - 6 oz jars,22.00,53,0,0,0
4,2,New Orleans Cajun Delights,USA,5,Chef Anton's Gumbo Mix,2,36 boxes,21.35,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
72,27,Escargots Nouveaux,France,58,Escargots de Bourgogne,8,24 pieces,13.25,62,0,20,0
73,28,Gai pâturage,France,59,Raclette Courdavault,4,5 kg pkg.,55.00,79,0,0,0
74,28,Gai pâturage,France,60,Camembert Pierrot,4,15 - 300 g rounds,34.00,19,0,0,0
75,29,Forêts d'érables,Canada,61,Sirop d'érable,2,24 - 500 ml bottles,28.50,113,0,25,0


In [276]:
suppliers.merge(prods, how='outer', validate='1:m')

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,1,Exotic Liquids,UK,1,Chai,1,10 boxes x 20 bags,18.00,39,0,10,0
1,1,Exotic Liquids,UK,2,Chang,1,24 - 12 oz bottles,19.00,17,40,25,0
2,1,Exotic Liquids,UK,3,Aniseed Syrup,2,12 - 550 ml bottles,10.00,13,70,25,0
3,2,New Orleans Cajun Delights,USA,4,Chef Anton's Cajun Seasoning,2,48 - 6 oz jars,22.00,53,0,0,0
4,2,New Orleans Cajun Delights,USA,5,Chef Anton's Gumbo Mix,2,36 boxes,21.35,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
72,27,Escargots Nouveaux,France,58,Escargots de Bourgogne,8,24 pieces,13.25,62,0,20,0
73,28,Gai pâturage,France,59,Raclette Courdavault,4,5 kg pkg.,55.00,79,0,0,0
74,28,Gai pâturage,France,60,Camembert Pierrot,4,15 - 300 g rounds,34.00,19,0,0,0
75,29,Forêts d'érables,Canada,61,Sirop d'érable,2,24 - 500 ml bottles,28.50,113,0,25,0


In [279]:
merged_sp = suppliers.merge(prods, how='outer', validate='1:m', indicator=True)
merged_sp

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued,_merge
0,1,Exotic Liquids,UK,1,Chai,1,10 boxes x 20 bags,18.00,39,0,10,0,both
1,1,Exotic Liquids,UK,2,Chang,1,24 - 12 oz bottles,19.00,17,40,25,0,both
2,1,Exotic Liquids,UK,3,Aniseed Syrup,2,12 - 550 ml bottles,10.00,13,70,25,0,both
3,2,New Orleans Cajun Delights,USA,4,Chef Anton's Cajun Seasoning,2,48 - 6 oz jars,22.00,53,0,0,0,both
4,2,New Orleans Cajun Delights,USA,5,Chef Anton's Gumbo Mix,2,36 boxes,21.35,0,0,0,1,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,27,Escargots Nouveaux,France,58,Escargots de Bourgogne,8,24 pieces,13.25,62,0,20,0,both
73,28,Gai pâturage,France,59,Raclette Courdavault,4,5 kg pkg.,55.00,79,0,0,0,both
74,28,Gai pâturage,France,60,Camembert Pierrot,4,15 - 300 g rounds,34.00,19,0,0,0,both
75,29,Forêts d'érables,Canada,61,Sirop d'érable,2,24 - 500 ml bottles,28.50,113,0,25,0,both


In [283]:
merged_sp[merged_sp['_merge'] != 'both']

Unnamed: 0,SupplierID,CompanyName,Country,ProductID,ProductName,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued,_merge
