In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Reading the entire csv with all the sheets
big_data = pd.ExcelFile("MM_for_nov17_aug19.xlsx")

In [3]:
# Sheet names
months = big_data.sheet_names

In [4]:
# List of columns that contains prices and are present for all months
columns_required = ["q_bread_price_per_8pieces", "q_bulgur_price_per_kilo", "q_chicken_price_per_kilo",
"q_eggs_price_per_30eggs", "q_potatoes_price_per_kilo", "q_tomatoes_price_per_kilo",
"q_cucumbers_price", "q_onions_price", "q_ghee_price_per_kilo",
"q_oil_price_per_litre", "q_rlentils_price_per_kilo", "q_rice_price_per_kilo",
"q_salt_price_per_500g", "q_sugar_price_per_kilo", "q_tomatop_price",
"q_isoap_price_per_piece", "q_lsoap_price_per_kilo", "q_dsoap_price_per_litre",
"q_spads_price_per_10pads", "q_toothp_price_per_100g", "q_sgas_price",
"q_mrkaz_price", "q_water_price_per_litre", "q_data_price_per_gb",
"region", "q_gov", "q_district", "q_sbd", "q_town",
"q_xrate_usdsyp_buy", "q_xrate_usdsyp_sell", "q_xrate_trysyp_buy",
"q_xrate_trysyp_sell", "q_xrate_jodsyp_buy", "q_xrate_jodsyp_sell",
"q_tomatop_price_per_kilo",
"q_onions_price_per_kilo",
"q_cucumbers_price_per_kilo",
"q_rgpetrol_price",
"q_mrpetrol_price",
"q_rgdiesel_price",
"q_mrdiesel_price",]

In [5]:
months

['Nov 17',
 'Dec 17',
 'Jan 18',
 'Feb 18',
 'March 18',
 'April 18',
 'May 18',
 'June 18',
 'July 18',
 'Aug18',
 'Sep 18',
 'Oct 18',
 'Nov 18',
 'Dec 18',
 'Jan 19',
 'Feb 19',
 'March 19',
 'April 19',
 'May 19',
 'June 19',
 'July 19',
 'Aug 19']

In [5]:
# Creating an empty dataframe with all the required columns
df = pd.DataFrame(columns = columns_required)

In [6]:
# Creating a list of dataframe containing only the required columns for each month
df_months = []
for mon in months:
    n = len(big_data.parse(mon))
    month_col = [mon]*n
    df_mon = big_data.parse(mon)
    df_mon = df_mon[columns_required]
    df_mon['Month'] = month_col # adding column "Month" with repeated Month name
    df_months.append(df_mon)

In [7]:
all_prices_df = pd.concat(df_months) # Merging all months into one

In [8]:
all_prices_df.reset_index(drop=True, inplace=True)

## Correcting town mapping to sub_districts, districts and govs

In [10]:
locations = pd.read_excel('UNOCHA_pcodes_nocamps.xlsx', sheet_name = 'admin4')
all_towns = list(locations['admin4Pcode'])

In [11]:
for town in all_towns:
    # creating list of indexes with a particular town name
    idx1 = all_prices_df[all_prices_df['q_town'] == town].index
    n = len(idx1)
    # Certain towns may not be present in our data
    if n>0:
        # Assigning correct sbd, district and gov codes to the corresponding town
        idx2 = locations[locations['admin4Pcode'] == town].index[0]
        all_prices_df.loc[idx1, 'q_sbd'] = locations.loc[idx2, 'admin3Pcode']
        all_prices_df.loc[idx1, 'q_district'] = locations.loc[idx2, 'admin2Pcode']
        all_prices_df.loc[idx1, 'q_gov'] = locations.loc[idx2, 'admin1Pcode']

In [12]:
all_prices_df

Unnamed: 0,q_bread_price_per_8pieces,q_bulgur_price_per_kilo,q_chicken_price_per_kilo,q_eggs_price_per_30eggs,q_potatoes_price_per_kilo,q_tomatoes_price_per_kilo,q_cucumbers_price,q_onions_price,q_ghee_price_per_kilo,q_oil_price_per_litre,...,q_xrate_jodsyp_buy,q_xrate_jodsyp_sell,q_tomatop_price_per_kilo,q_onions_price_per_kilo,q_cucumbers_price_per_kilo,q_rgpetrol_price,q_mrpetrol_price,q_rgdiesel_price,q_mrdiesel_price,Month
0,1644.44,,,,,,,,,,...,,,,,,,,,,Nov 17
1,1644.44,,,,,,,,,,...,,,,,,,,,,Nov 17
2,2044.44,,,,,,,,,,...,,,,,,,,,,Nov 17
3,,,,,,,,,,,...,,,,,,,,,,Nov 17
4,,,,,,,,,,,...,,,,,,,,,,Nov 17
5,,,,,,,,,,,...,,,,,,,,,,Nov 17
6,,,,,,425,,,,,...,,,,3000,,,,,,Nov 17
7,,,,,,400,,,,,...,,,,3200,,,,,,Nov 17
8,,,,,,450,,,,,...,,,,2900,,,,,,Nov 17
9,1644.44,,,,,,,,,,...,,,,,,,,,,Nov 17


In [13]:
all_prices_df.to_csv("aggregated_monthly.csv")