# Looking TravelPony Facebook Ad Data Using Statistics
## Kenneth R. Miller
#### This is a short exercise in basic ad analysis using data from the hotel travel website TravelPony. 

#### Reading in the data. 

In [3]:
import pandas as pd
import arrow
import numpy as np
import statsmodels.api as stat

# Data can be found in the github repository
travelponydf = pd.read_csv("Travel Pony Facebook.csv")
# Looking at the column headers
list(travelponydf)

# Adding a column to the data frame
travelponydf["Cost Per Impression"] = travelponydf["Amount Spent (USD)"]/travelponydf["Impressions"]

# Getting the day on which each ad was bid
# Empty list to hold the days
daylist = []
# Iterating over the rows in travelponydf
for row in range(len(travelponydf)):
    # Using Arrow to get the weekday number (Starting on Monday)
    date = travelponydf['Start Date'].iloc[row]
    par_date = arrow.get(str(date), 'M/D/YY')
    daylist.append(par_date.weekday())
    
travelponydf["Day of Week"] = daylist

#### Finding the cheapest bid day based on cost per impression.

In [13]:
# Creating a separate list for each day...this could be accomplished
    # with a dictionary but this works fine
mon = []
tues = []
wed = []
thurs = []
fri = []
sat = []
sun = []

# Iterating over each row in travelpony and appending the cost per impression data to the correct day lists
for row in range(len(travelponydf)):
    if travelponydf["Day of Week"].iloc[row] == 0:
        mon.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 1:
        tues.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 2:
        wed.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 3:
        thurs.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 4:
        fri.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 5:
        sat.append(travelponydf["Cost Per Impression"].iloc[row])
    elif travelponydf["Day of Week"].iloc[row] == 6:
        sun.append(travelponydf["Cost Per Impression"].iloc[row])
    else:
        # All the days are elif saving the else for an error message if need be
        print("error")
        break
# Printing the average per day in a readable block of text  
print("Monday: " + str(round(np.mean(mon), 5)) + "\n"
      "Tuesday: " + str(round(np.mean(tues), 5)) + "\n"
      "Wednesday: " + str(round(np.mean(wed), 5)) + "\n"
      "Thursday: " + str(round(np.mean(thurs), 5)) + "\n"
      "Friday: " + str(round(np.mean(fri), 5)) + "\n"
      "Saturday: " + str(round(np.mean(sat), 5)) + "\n"
      "Sunday: " + str(round(np.mean(sun), 5)) + "\n"
      )
# As we can see, Friday is the most expensive bid day and Saturday is the cheapest

Monday: 0.00291
Tuesday: 0.00304
Wednesday: 0.00298
Thursday: 0.0034
Friday: 0.0041
Saturday: 0.00263
Sunday: 0.00369



#### Computing the Correlation between amount spent and various advertising categories.

In [11]:
list(travelponydf)
# These are the categories that we want
cats = ['Reach', 'Frequency', 'Unique Clicks', 'Page Likes']
# Iterating over each category, find the correlation against amount spent
for category in cats:
    print(category + ": " + str(
            travelponydf['Amount Spent (USD)'].corr(travelponydf[category]))
    )

Reach: 0.7031238065113846
Frequency: 0.13020086992866337
Unique Clicks: 0.8829931774784137
Page Likes: 0.7576119292180449


#### Creating a simple regression to predict unique clicks. I like statsmodels because it creates nice, readable results tables.

In [12]:
# Independent Variables
X = travelponydf[['Reach', 'Frequency']]
# Dependent variable
y = travelponydf['Unique Clicks']

# Ordinary Least Squares Regression
model = stat.OLS(y, X).fit()
clickpredict = model.predict(X)
# Display the output
model.summary()

0,1,2,3
Dep. Variable:,Unique Clicks,R-squared:,0.557
Model:,OLS,Adj. R-squared:,0.556
Method:,Least Squares,F-statistic:,2325.0
Date:,"Sat, 13 Oct 2018",Prob (F-statistic):,0.0
Time:,17:17:26,Log-Likelihood:,-15973.0
No. Observations:,3705,AIC:,31950.0
Df Residuals:,3703,BIC:,31960.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Reach,0.0019,3.12e-05,62.490,0.000,0.002,0.002
Frequency,3.6139,0.298,12.109,0.000,3.029,4.199

0,1,2,3
Omnibus:,5107.616,Durbin-Watson:,0.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5128392.747
Skew:,7.331,Prob(JB):,0.0
Kurtosis:,184.674,Cond. No.,9840.0
