#### Check correlation between spend and sales, week-by-week

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

PATH = '/home/tbrownex/data/Hackett/JLP/UseCase2/'
FILE = 'BranchSalesDriver'

In [2]:
df = pd.read_csv(PATH+FILE)

df7 = df.loc[df["Year"]==2017]
df6 = df.loc[df["Year"]==2016]

assert (df7.shape[0]+df6.shape[0]==df.shape[0]), "Row counts are wrong"

In [33]:
# 2016 first
parts = []
parts.append(df6[:17])
parts.append(df6[17:34])
parts.append(df6[34:36])
parts.append(df6[36:])

print("Weeks:")
for x in parts:
    print("{}-{}".format(x["Week"].min(), x["Week"].max()))

diffs = []
for x in parts:
    tmp = x["PPC Spend"]
    diffs.append(tmp.diff())

sales = []
for x in parts:
    sales.append(x["Online Sales"][1:])

tom = []
for x in diffs:
    tmp = x[1:]
    tom.append(tmp)

ppc = pd.Series()
for x in tom:
    ppc = ppc.append(x)

sal = pd.Series()
for x in sales:
    sal = sal.append(x)

print("Correlation: ", ppc.corr(sal))

Weeks:
1-17
19-35
37-38
50-52
Correlation:  0.06770166716833695


In [167]:
print("Correlation Method - no Lag")
for x in ["pearson", "kendall", "spearman"]:
    print("{:<12}{:.2f}".format(x,ppc.corr(sales, method=x)))

Correlation Method - no Lag
pearson     0.53
kendall     0.35
spearman    0.51


##### Correlation, lag of 1-5 weeks

In [150]:
size = len(ppc)
for l in [1,2,3,4,5]:
    print("Lag Value: ", l)
    for x in ["pearson", "kendall", "spearman"]:
        p = ppc[0:(size-l)]
        s = sales[l:]
        assert (len(p)==len(s)),   "Length of series mismatch"
        c = p.corr(s, method=x)
        print("  {:<12}: {:.2f}".format(x,c))

Lag Value:  1
  pearson     : 0.54
  kendall     : 0.36
  spearman    : 0.52
Lag Value:  2
  pearson     : 0.54
  kendall     : 0.36
  spearman    : 0.52
Lag Value:  3
  pearson     : 0.54
  kendall     : 0.35
  spearman    : 0.50
Lag Value:  4
  pearson     : 0.56
  kendall     : 0.37
  spearman    : 0.53
Lag Value:  5
  pearson     : 0.57
  kendall     : 0.38
  spearman    : 0.54


In [25]:
for x in [1,2,3,4,5,6]:
    diff  = ppc.diff(x)
    diff  = diff[x:]
    s     = sales[x:]
    c = round(diff.corr(s), 2)
    plt.scatter(diff, s)
    T = "Lag = " + str(x) +"\n" + "Correlation = " + str(c)
    plt.title(T)
    plt.savefig("/home/tbrownex/fig"+str(x)+'.png', bbox_inches='tight')
    plt.close()