In [76]:
import pandas as pd

In [157]:
# Read raw data
train = pd.read_csv('data/contraceptive_logistics_data.csv')
location = pd.read_csv('data/service_delivery_site_data.csv')
product = pd.read_csv('data/product.csv')

# Expand data frame
month_year = train[['year','month']].drop_duplicates().reset_index(drop=True)
product_site = train[['region','district','site_code','product_code']].drop_duplicates().reset_index(drop=True)
train_base = pd.merge(month_year.assign(j=1), product_site.assign(j=1)).drop(columns = 'j')
train = pd.merge(train_base, train, how='left')

# Add date and index
train['day'] = 1
train['ds'] = pd.to_datetime(train[['year','month','day']])
train = train.sort_values(by=['site_code','product_code','ds']).reset_index(drop=True)
train['isna'] = train['stock_distributed'].isna()
train['idx'] = train.groupby(['site_code','product_code'])['ds'].rank(method='first', ascending=True)
train = train.drop(columns = ['year','month','day'])

# Join with location
train = pd.merge(train, location.drop(columns=['site_region','site_district']))

# Join with product
train = pd.merge(train, product)

# Rearrange columns
train = train[['site_code','product_code'] + train.drop(columns=['site_code','product_code']).columns.tolist()]

# Change category
train = train.sort_values(by=['site_code','product_code','ds']).reset_index(drop=True)
train['idx'] = train['idx'].astype(int)
train['ds'] = train['ds'].dt.date.astype(str)

In [147]:
train

Unnamed: 0,site_code,product_code,region,district,stock_initial,stock_received,stock_distributed,stock_adjustment,stock_end,average_monthly_consumption,stock_stockout_days,stock_ordered,ds,isna,idx,site_type,site_latitude,site_longitude,product_type,product_name
0,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-01-01,True,1,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
1,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-02-01,True,2,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
2,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-03-01,True,3,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
3,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-04-01,True,4,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
4,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-05-01,True,5,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61060,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-05-01,True,41,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61061,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-06-01,True,42,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61062,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-07-01,True,43,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61063,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-08-01,True,44,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE


In [148]:
df = pd.read_csv('data/ifc_clean.csv')
df

Unnamed: 0,site_code,product_code,region,district,stock_initial,stock_received,stock_distributed,stock_adjustment,stock_end,average_monthly_consumption,stock_stockout_days,stock_ordered,ds,isna,idx,site_type,site_latitude,site_longitude,product_type,product_name
0,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-01-01,True,1,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
1,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-02-01,True,2,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
2,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-03-01,True,3,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
3,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-04-01,True,4,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
4,C1004,AS21126,AGNEBY-TIASSA-ME,AGBOVILLE,,,,,,,,,2016-05-01,True,5,Hospital,5.92834,-4.21145,Injectable Contraceptive,MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SA...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61060,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-05-01,True,41,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61061,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-06-01,True,42,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61062,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-07-01,True,43,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE
61063,C5076,AS27139,KABADOUGOU-BAFING-FOLON,KORO,,,,,,,,,2019-08-01,True,44,Hospital,8.55294,-7.46151,Emergency Contraceptive (Pill),LEVONORGESTREL 1.5 MG CP BOITE


In [149]:
(train.product_type == df.product_type).sum()

61065

In [163]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61065 entries, 0 to 61064
Data columns (total 20 columns):
site_code                      61065 non-null object
product_code                   61065 non-null object
region                         61065 non-null object
district                       61065 non-null object
stock_initial                  38842 non-null float64
stock_received                 38842 non-null float64
stock_distributed              38842 non-null float64
stock_adjustment               38842 non-null float64
stock_end                      38842 non-null float64
average_monthly_consumption    38842 non-null float64
stock_stockout_days            38842 non-null float64
stock_ordered                  38072 non-null float64
ds                             61065 non-null object
isna                           61065 non-null bool
idx                            61065 non-null int64
site_type                      61065 non-null object
site_latitude                  61065 n

In [166]:
for col in df.columns.tolist():
    print(col, sum(df[col] == train[col]))

site_code 61065
product_code 61065
region 61065
district 61065
stock_initial 38842
stock_received 38842
stock_distributed 38842
stock_adjustment 38842
stock_end 38842
average_monthly_consumption 38842
stock_stockout_days 38842
stock_ordered 38072
ds 61065
isna 61065
idx 61065
site_type 61065
site_latitude 61065
site_longitude 61065
product_type 61065
product_name 0


In [175]:
train.product_name[0].strip

<function str.strip>

In [169]:
df.product_name[0]

'MEDROXYPROGESTERONE 104MG/0.65ML INJ UNITE (SAYANA PRESS) UNITE'