### Exploring traffic, impressions and interactions

In [172]:
#Can skip these imports if you alreaady have them:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import matplotlib.dates as mdates

In [162]:
#Can skip these since you already have them as Pandas dataframes:
traffic = pd.read_csv("traffic.csv", index_col=0, parse_dates=True)
impressions = pd.read_csv("impression.csv", index_col=0, parse_dates=True)
interactions = pd.read_csv("interaction.csv", index_col=0, parse_dates=True)

In [163]:
#Calculate the rates, if no data set rate to 0:
def calculate_rate(num, denom):
    rate = num/denom * 100
    rate = rate.fillna(0)
    return rate
    
impression_rate = calculate_rate(impressions, traffic)
interaction_rate = calculate_rate(interactions, impressions)

In [164]:
#Function searches for outliers in a dataframe and stores them in a boolean dataframe
#Then prints the index row and column, i.e. date and unit of the outliers
#Prints the outliers on most recent date

def print_outliers(data, rate):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    IQR_adjusted = IQR
    IQR_adjusted[IQR_adjusted == 0] = np.nan
    IQR_median = IQR_adjusted.median()
    #Set some conditions for defining outliers:
    outliers = (data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR)) | (data > 100) | (data < (Q1 - 1.5 * IQR_median)) | (data > (Q3 + 1.5 * IQR)) 
    no_outliers = outliers[outliers].count().sum()/outliers.count().sum()
    #Just do the most recent date:
    outliers_last = outliers.iloc[[-1],:]
    no_outliers_last = outliers_last[outliers_last].count().sum()
    s = outliers_last[outliers_last].stack()
    date = s.index.get_level_values(0)[0]
    res1 = str(no_outliers_last) + " outlier " + str(rate) + "s on " + str(date) + " :"
    res2 = str(list(s.index.get_level_values(1)))
    result = res1 + res2
    return outliers, outliers_last, result

#### Impression rate - outliers

In [165]:
outlier_impressions, outlier_impressions_last, outlier_impressions_result = print_outliers(impression_rate, "impression")

In [166]:
outlier_impressions_result

"8 outlier impressions on 2018-12-12 00:00:00 :['Calvin Klein Bangkok T1 West', 'Calvin Klein Manila Fiestamall Obsessed', 'Calvin Klein Manila Fiestamall One Gold', 'Calvin Klein Moscow Airport', 'Coty Gondola Shanghai Explorium 2.0', 'Guerlain Rouge G Champs Elysées Paris', 'Marc Jacobs London Heathrow', 'Mayb 15ft Wollongong NSW']"

#### Interaction rate - outliers

In [167]:
outlier_interactions, outlier_interactions_last, outlier_interactions_result = print_outliers(interaction_rate, "interaction")

In [168]:
outlier_interactions_result

"5 outlier interactions on 2018-12-12 00:00:00 :['Calvin Klein Bangkok T1 West', 'Calvin Klein Manila Fiestamall Obsessed', 'Calvin Klein Manila Fiestamall One Gold', 'Hugo Boss London Heathrow', 'Hugo Boss Tel Aviv Airport']"

#### Chart outlier units - traffic and impressions

In [197]:
#Saves the charts of most recent problem units for impressions as impression_problem01.png and so on:
impressions_to_chart = outlier_impressions.columns.values[outlier_impressions_last.sum() > 0]
traffic2 = traffic[impressions_to_chart]
impressions2 = impressions[impressions_to_chart]

for i in np.arange(impressions_to_chart.size):
    fig, ax = plt.subplots(figsize=(8,5))
    ax.plot(traffic2.iloc[:,i])
    ax.plot(impressions2.iloc[:,i])
    ax.set_title(traffic2.columns.values[i])
    ax.legend(labels=['traffic', 'impressions'],loc='best')
    fig.savefig('impression_problem'+str(i+1)+'.png')
    fig.clf()

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

#### Chart outlier units - impressions and interactions

In [200]:
#Saves the charts of most recent problem units for interactions as interaction_problem01.png and so on:
interactions_to_chart = outlier_interactions.columns.values[outlier_interactions_last.sum() > 0]
impressions3 = impressions[interactions_to_chart]
interactions3 = interactions[interactions_to_chart]
    
for i in np.arange(interactions_to_chart.size):
    fig, ax = plt.subplots(figsize=(8,5))
    ax.plot(impressions3.iloc[:,i])
    ax.plot(interactions3.iloc[:,i])
    ax.set_title(impressions3.columns.values[i])
    ax.legend(labels=['impressions', 'interactionss'],loc='best')
    fig.savefig('interaction_problem'+str(i+1)+'.png')
    fig.clf()

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>

<Figure size 576x360 with 0 Axes>