# Start

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
purchase_data = pd.read_excel("./Analyst_dataset.xlsx", sheet_name='Purchase Exit Survey Data')
airings_data = pd.read_excel("./Analyst_dataset.xlsx", sheet_name='Airings')
lookup_data = pd.read_excel("./Analyst_dataset.xlsx", sheet_name='Lookup', skiprows=1)

# Preprocessing

In [3]:
lookup_data.shape

(44, 3)

In [4]:
lookup_data = lookup_data.dropna(how='all')

In [5]:
lookup_data.shape

(43, 3)

In [6]:
lookup_data['Exit Survey'] = lookup_data['Exit Survey'].str.lower()
lookup_data['Airings'] = lookup_data['Airings'].str.upper()

In [7]:
airings_data['Network'] = airings_data['Network'].str.upper()

# Transposing Purchase Exit Survey Data - Converting dates from columns to rows in Purchase Exit Survey Data

In [8]:
current_year = purchase_data.iloc[0,:].dropna()
current_year = int(current_year)
current_year

2017

In [9]:
months = []
for month in purchase_data.iloc[2,2:].dropna():
    months.append(month)
months

['September', 'October']

In [10]:
# current_month = months[0]
# i = 0
# for count, day in enumerate(day_nums, start=1):
#     if str(day) > str(purchase_data.iloc[3, 2+count]):
#         i += 1
#         current_month = month[i]
#         current_date = str(current_year) + '-' + current_month + '-' + str(day)
#         print(current_date)
#         #print(count)
#     else:
#         current_date = str(current_year) + '-' + current_month + '-' + str(day)
#         print(current_date)
#         #print(count)

day_nums = np.array(purchase_data.iloc[3,2:], dtype=int)

parsed_dates = []
current_month = months[0]
i = 0
for count, today in enumerate(day_nums, start=1):
    try:
        tomorrow = day_nums[count]
    except:
        pass
        # current_date = str(current_year) + '-' + current_month + '-' + str(today)
        # current_date = datetime.strptime(current_date, '%Y-%B-%d').date()
        # parsed_dates.append(current_date)
        #print(type(current_date))
    if today > tomorrow:
        current_date = str(current_year) + '-' + current_month + '-' + str(today)
        current_date = datetime.strptime(current_date, '%Y-%B-%d').date()
        i += 1
        current_month = months[i]
        parsed_dates.append(current_date)
        #print(current_date)
        #print(count)
    else:
        current_date = str(current_year) + '-' + current_month + '-' + str(today)
        current_date = datetime.strptime(current_date, '%Y-%B-%d').date()
        #print(current_date)
        parsed_dates.append(current_date)
        #print(count)

In [11]:
purchase_data.iloc[3,2:] = parsed_dates

In [12]:
purchase_data_transpose = purchase_data.iloc[3:,:].transpose()
#new_df.head()

In [13]:
purchase_data_transpose.index = purchase_data_transpose.iloc[:, 0]

In [14]:
purchase_data_transpose = purchase_data_transpose.iloc[1:]

In [15]:
purchase_data_transpose = purchase_data_transpose.drop(labels=3, axis=1)

In [16]:
purchase_data_transpose.columns = purchase_data_transpose.iloc[0]

In [17]:
purchase_data_transpose = purchase_data_transpose.drop(labels='Source')

In [18]:
purchase_data_transpose.index = pd.to_datetime(purchase_data_transpose.index)

In [19]:
purchase_data_transpose.rename_axis('date', inplace=True)
#purchase_data_transpose

In [20]:
purchase_data_transpose.shape

(56, 30)

# Overall metrics by Network

## Sum of Purchases by Network

In [26]:
sum_of_purchases = purchase_data_transpose.sum(axis=0)
sum_of_purchases = sum_of_purchases.to_frame()
sum_of_purchases = sum_of_purchases.rename(columns={0:'Purchases'})
sum_of_purchases.index = sum_of_purchases.index.str.lower()

In [33]:
sum_of_purchases.shape

(30, 1)

## Joining Purchases to Lookup Data

In [34]:
lookup_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43 entries, 0 to 43
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Exit Survey    43 non-null     object
 1   Airings        38 non-null     object
 2   Exit Survey.1  43 non-null     object
dtypes: object(3)
memory usage: 1.3+ KB


In [35]:
overall_tbl = lookup_data.merge(right=sum_of_purchases, left_on='Exit Survey', right_on='Source', how='left')
overall_tbl.drop(labels='Exit Survey.1', axis=1, inplace=True)
#overall_tbl.rename(columns={0:'Purchases'}, inplace=True)
overall_tbl.set_index('Exit Survey', inplace=True)
overall_tbl.shape

(43, 2)

## Spend and Lift by Network

In [37]:
airings_spend_and_lift = airings_data.groupby('Network')[['Spend', 'Lift']].agg('sum')
airings_spend_and_lift.shape

(19, 2)

## Joining Purchases/Lookup to Spend and Lift

In [38]:
overall_tbl = overall_tbl.merge(right=airings_spend_and_lift,left_on='Airings', right_index=True, how='left')
overall_tbl.shape

(43, 4)

## Computing Metrics by Network

In [39]:
overall_tbl['Conversion Rate'] = overall_tbl['Purchases'] / overall_tbl['Lift'] * 100
overall_tbl['Cost Per Acquisition'] = overall_tbl['Spend'] / overall_tbl['Purchases']
overall_tbl['Cost Per Visitor'] = overall_tbl['Spend'] / overall_tbl['Lift']
overall_tbl['Percent of Purchases'] = overall_tbl['Purchases'] / sum(overall_tbl['Purchases'].fillna(0)) * 100
overall_tbl['Percent of Spend'] = overall_tbl['Spend'] / sum(overall_tbl['Spend'].fillna(0)) * 100
overall_tbl['Percent Pur > Percent Spend'] = overall_tbl['Percent of Purchases'] > overall_tbl['Percent of Spend']
overall_tbl

Unnamed: 0_level_0,Airings,Purchases,Spend,Lift,Conversion Rate,Cost Per Acquisition,Cost Per Visitor,Percent of Purchases,Percent of Spend,Percent Pur > Percent Spend
Exit Survey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
(blank),,3.0,,,,,,1.271186,,False
aapka_colors,,6.0,,,,,,2.542373,,False
baby_first,BABY,1.0,,,,,,0.423729,,False
bloomberg,BLOM,2.0,4966.72,199.0,1.005025,2483.36,24.958392,0.847458,2.242951,False
cbs_sports,CBSS,1.0,,,,,,0.423729,,False
cnbc,CNBC,15.0,6481.42,406.0,3.694581,432.094667,15.964089,6.355932,2.926984,True
cnn,CNN,23.0,18114.35,1081.0,2.12766,787.580435,16.757031,9.745763,8.180369,True
comedy_central,COM,2.0,7501.25,108.0,1.851852,3750.625,69.456019,0.847458,3.387535,False
cozi,COZI,,,,,,,,,False
dateline,DATELINE,10.0,15288.11,862.0,1.160093,1528.811,17.735626,4.237288,6.90405,False


In [40]:
overall_tbl.shape

(43, 10)

## Done

# Grouped Metrics by Network and Month

## Purchase Data by Network and Month

In [None]:
purchase_data_by_month = purchase_data_transpose.groupby(pd.Grouper(freq='M')).agg('sum')
purchase_data_by_month

In [None]:
purchase_data_by_month = purchase_data_by_month.transpose()
purchase_data_by_month = purchase_data_by_month.stack().to_frame()
purchase_data_by_month.rename(columns={0:'Purchases'}, inplace=True)
purchase_data_by_month = purchase_data_by_month.reset_index()
purchase_data_by_month

In [None]:
purchase_data_by_month.shape

## Airings Sheet

In [None]:
airings_data.info()

In [None]:
# airings_data.groupby([pd.Grouper(key='Date/Time ET', freq='M'), 'Network'])[['Spend', 'Lift']].agg('sum')

## Preparing Lookup Data for Join

In [None]:
lookup_data = lookup_data.drop('Exit Survey.1', axis=1)
# lookup_data = lookup_data.set_index('Exit Survey')
# lookup_data = lookup_data.rename_axis('Source')
lookup_data

In [None]:
lookup_data.shape

## Joining Purchases by network and month to Lookup Data

In [None]:
# joined_tbl = lookup_data[['Exit Survey', 'Airings']].merge(right=purchase_data_by_date, left_on='Exit Survey', right_on='Source', how='left')
# #joined_tbl.drop(labels='Exit Survey', axis=1, inplace=True)
# joined_tbl

In [None]:
# purchase_grouped = purchase_data_by_month.join(lookup_data, how='right')
# purchase_grouped

In [None]:
test = lookup_data.merge(right=purchase_data_by_month, left_on='Exit Survey', right_on='Source', how='left').set_index(['Exit Survey', 'date'])
test

In [None]:
print(test.to_string())

In [None]:
test.shape

## Spend and Lift by Network and Month

In [None]:
# NEED TO drop Network as an index and make a column, join with purchase_grouped and keep the 
airings_spend_lift_grouped = airings_data.groupby(['Network', pd.Grouper(key='Date/Time ET', freq='M')])[['Spend', 'Lift']].agg('sum')
airings_spend_lift_grouped

In [None]:
# airings_spend_lift_grouped.reset_index()

In [None]:
# purchase_grouped.reset_index()

## Joining Purchases/Lookup to Spend and Lift by Network and Month

In [None]:
month_and_network_grouped = purchase_grouped.reset_index().merge(right=airings_spend_lift_grouped.reset_index(), left_on=['Airings', 'date'], right_on=['Network', 'Date/Time ET'], how='left')
month_and_network_grouped

In [None]:
month_and_network_grouped= month_and_network_grouped.set_index(['Source', 'date']).drop(labels=['Airings', 'Network', 'Date/Time ET'], axis=1)
month_and_network_grouped

In [None]:
month_and_network_grouped.shape

In [None]:
airings_data.query('Network == "FOOD"')

# What networks have purchases but no spend?

In [None]:
airings_data.query('Spend == 0')['Network'].value_counts()

In [None]:
airings_data.groupby('Network')[['Spend', 'Lift']].agg('sum')