# Imports

In [1]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# Load Datasets

## Raw Data

In [2]:
# Create the marketing dataset
media = pd.read_excel("./Raw_Data/Media data-Sale Calendar-NPS Scores_Data.xlsx")
media.drop('Unnamed: 0',axis=1,inplace=True)
media.columns = media.iloc[1]
media.drop(media.index[:2],inplace=True)
media = media.fillna(0)

#Create monthly dataset of NPS and Stock Price
stock = pd.read_excel("./Raw_Data/Media data-Sale Calendar-NPS Scores_Data.xlsx",sheet_name=2)
stock = stock.T
stock.columns = stock.iloc[0] 
stock.drop(stock.index[0],inplace=True)
stock.set_index(stock.columns[0],inplace=True)
stock.columns.name = None
stock.index.name = None
month_mapping = {
    "July": "Jul",
    "Sept": "Sep"
}
stock.index = [month_mapping.get(month[:4], month[:3]) + month[4:].replace("'", "") for month in stock.index]
stock.index = pd.to_datetime(stock.index, format="%b%y") + pd.offsets.MonthEnd(0)
stock.head(5)

Unnamed: 0,NPS,Stock Index
2023-07-31,54.599588,1177
2023-08-31,59.987101,1206
2023-09-30,46.925419,1101
2023-10-31,44.398389,1210
2023-11-30,47.0,1233


## Cleaned Data

In [3]:
df = pd.read_csv('./Data/daily_data.csv')
weather = pd.read_csv('./Data/weather_combined.csv')
df.set_index('order_date',inplace=True)
df.index = pd.DatetimeIndex(df.index)
df.drop('Unnamed: 0',axis=1,inplace=True)
weather.set_index('Unnamed: 0',inplace=True)
weather.index.name = 'Date/Time'
weather.index = pd.DatetimeIndex(weather.index)
monthly_weather = weather.resample('ME').mean()
monthly_weather.head(5)

Unnamed: 0_level_0,Max Temp (°C),Min Temp (°C),Mean Temp (°C),Heat Deg Days (°C),Cool Deg Days (°C),Total Rain (mm),Total Snow (cm),Total Precip (mm),Snow on Grnd (cm)
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-31,25.145161,14.217742,19.932258,1.866129,3.798387,1.087097,0.0,1.087097,0.0
2023-08-31,24.983871,15.024194,20.020968,1.4,3.420968,2.094174,0.009285,2.172875,2.129032
2023-09-30,21.308333,11.908333,16.476667,3.382222,1.858889,2.001667,0.0,2.001667,0.266667
2023-10-31,17.440323,10.08871,14.609355,4.12371,0.733065,1.524206,0.040928,1.698943,0.391517
2023-11-30,13.040833,4.653571,9.219619,9.110381,0.33,2.346432,0.567121,2.958163,1.24937


# Create Monthly Dataset

In [4]:
# Pre-Procesing
monthly_df = df[['gmv', 'units', 'deliverybdays',
       'deliverycdays', 'order_payment_type', 'sla','product_mrp',
       'product_procurement_sla','product_analytic_category']]
monthly_df['order_payment_type'] = pd.Categorical(monthly_df.order_payment_type)
monthly_df['product_analytic_category'] = pd.Categorical(monthly_df['product_analytic_category'])
monthly_df.index = pd.DatetimeIndex(monthly_df.index)

In [5]:
# Aggregate based on type of data and definition of the columns
summed_columns = monthly_df.resample('ME')[['gmv', 'units', 'product_mrp']].sum()
averaged_columns = monthly_df.resample('ME')[['deliverybdays', 'deliverycdays', 'sla', 'product_procurement_sla']].mean()
categorical_sums = pd.get_dummies(df[['order_payment_type', 'product_analytic_category']])
categorical_sums.index = pd.DatetimeIndex(categorical_sums.index)
categorical_sums = categorical_sums.resample('ME').sum()
monthly_dataset = pd.concat([summed_columns, averaged_columns, categorical_sums], axis=1)
monthly_dataset.head(5)

Unnamed: 0_level_0,gmv,units,product_mrp,deliverybdays,deliverycdays,sla,product_procurement_sla,order_payment_type_COD,order_payment_type_Prepaid,product_analytic_category_Camera,product_analytic_category_CameraAccessory,product_analytic_category_EntertainmentSmall,product_analytic_category_GameCDDVD,product_analytic_category_GamingHardware
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-07-31,169742400.0,87617,288693773,0.003124,0.003323,6.162169,4.269408,62188,23266,3302,11750,54535,4737,11130
2023-08-31,223858.0,152,363947,0.033333,0.04,5.706667,2.666667,110,40,2,19,97,3,29
2023-09-30,204286100.0,103191,338010322,0.001566,0.001804,5.746338,6.065683,77075,23819,4487,16808,60909,5725,12965
2023-10-31,497444100.0,207216,915132391,0.001228,0.001455,5.583446,3.755622,151188,51547,15495,25346,129088,10021,22785
2023-11-30,325808500.0,129876,535682458,0.000623,0.000741,5.682916,4.200128,100132,26727,8751,17627,75513,7736,17232


In [6]:
media['Date'] = pd.to_datetime(media[['Year', 'Month']].assign(Day=1))
media.set_index('Date', inplace=True)
media.drop(columns=['Year', 'Month'], inplace=True)
media.index += pd.offsets.MonthEnd(0)
media.columns.name = None
media.head(5)

Unnamed: 0_level_0,Total Investment,TV,Digital,Sponsorship,Content Marketing,Online marketing,Affiliates,SEM,Radio,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-07-31,17.061775,0.21533,2.533014,7.41427,0.000933,1.327278,0.547254,5.023697,0.0,0.0
2023-08-31,5.064306,0.006438,1.278074,1.063332,6e-06,0.129244,0.073684,2.513528,0.0,0.0
2023-09-30,96.25438,3.879504,1.356528,62.787651,0.610292,16.37999,5.038266,6.202149,0.0,0.0
2023-10-31,170.156297,6.144711,12.62248,84.672532,3.444075,24.371778,6.973711,31.927011,0.0,0.0
2023-11-30,51.21622,4.22063,1.275469,14.172116,0.168633,19.561574,6.595767,5.222032,0.0,0.0


In [7]:
monthly_dataset = pd.concat([monthly_dataset,media,stock,monthly_weather],axis=1)
monthly_dataset

gmv                                             0
units                                           0
product_mrp                                     0
deliverybdays                                   0
deliverycdays                                   0
sla                                             0
product_procurement_sla                         0
order_payment_type_COD                          0
order_payment_type_Prepaid                      0
product_analytic_category_Camera                0
product_analytic_category_CameraAccessory       0
product_analytic_category_EntertainmentSmall    0
product_analytic_category_GameCDDVD             0
product_analytic_category_GamingHardware        0
Total Investment                                0
TV                                              0
Digital                                         0
Sponsorship                                     0
Content Marketing                               0
Online marketing                                0


# Export

In [None]:
monthly_dataset.to_csv('./Data/monthly_dataset.csv')