# Visualisation de la série temporelle des taux de conversion: corrélation, fonctions d'autocorrélation et QQ plot

In [None]:
# required modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import seaborn as sns

In [None]:
def corr(data) :
# build scatterplot
    ncols = 3
    nrows = 3
    lags = 9
 
    fig, axes = plt.subplots(ncols=ncols, nrows=nrows, figsize=(6 * ncols, 6 * nrows))
 
    for ax, lag in zip(axes.flat, np.arange(1, lags + 1, 1)):
        lag_str = 't-{}'.format(lag)
        X = (pd.concat([data, data.shift(-lag)], axis=1, keys=['y']+[lag_str]).dropna())
    
    # plot data
        X.plot(ax=ax, kind='scatter', y='y', x=lag_str);
        corr = X.corr().as_matrix()[0][1]
        ax.set_ylabel('Original');
        ax.set_title('Lag: {} (corr={:.2f})'.format(lag_str, corr));
        ax.set_aspect('equal');
    
        # top and right spine from plot
        sns.despine();
 
    fig.tight_layout()
    plt.show()


In [None]:
def ts_plot(y, lags=None, title=''):
    '''
    Calculate acf, pacf, histogram, and qq plot for a given time series
    '''
    # if time series is not a Series object, make it so
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    
    # initialize figure and axes
    fig = plt.figure(figsize=(14, 12))
    layout = (3, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))
    qq_ax = plt.subplot2grid(layout, (2, 0))
    hist_ax = plt.subplot2grid(layout, (2, 1))
    
    # time series plot
    y.plot(ax=ts_ax)
    plt.legend(loc='best')
    ts_ax.set_title(title);
    
    # acf and pacf
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5) 
    
    # qq plot
    sm.qqplot(y, line='s', ax=qq_ax)
    qq_ax.set_title('Normal QQ Plot')
    
    # hist plot
    y.plot(ax=hist_ax, kind='hist', bins=25);
    hist_ax.set_title('Histogram');
    plt.tight_layout();
    plt.show()
    return

# Visualisation des moyennes flottantes et effets journaliers du taux de conversion (saisonnalité)

In [None]:
# load required modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.tsa 

In [None]:
# simple line plot
def rolling_mean(y):
    plt.plot(y)
    plt.title('Taux de conversion', fontsize=24)
    plt.ylabel('taux moyen journalier')
    plt.xlabel('Date')
    plt.show()

    fig, axes = plt.subplots(2, 2, sharey=False, sharex=False);
    fig.set_figwidth(14);
    fig.set_figheight(8);

In [None]:
# push data to each ax
#upper left
    axes[0][0].plot(y.index, y, label='Original');
    axes[0][0].plot(y.index, y.rolling(window=4).mean(), label='4-Months Rolling Mean', color='crimson');
    axes[0][0].set_xlabel("Date");
    axes[0][0].set_ylabel("taux moyen journalier");
    axes[0][0].set_title("4-Months Moving Average");
    axes[0][0].legend(loc='best');
 
# upper right
    axes[0][1].plot(y.index, y, label='Original')
    axes[0][1].plot(y.index, y.rolling(window=6).mean(), label='6-Months Rolling Mean', color='crimson');
    axes[0][1].set_xlabel("Date");
    axes[0][1].set_ylabel("taux moyen journalier");
    axes[0][1].set_title("6-Months Moving Average");
    axes[0][1].legend(loc='best');
 
# lower left
    axes[1][0].plot(y.index, y, label='Original');
    axes[1][0].plot(y.index, y.rolling(window=8).mean(), label='8-Months Rolling Mean', color='crimson');
    axes[1][0].set_xlabel("Date");
    axes[1][0].set_ylabel("taux moyen journalier");
    axes[1][0].set_title("8-Months Moving Average");
    axes[1][0].legend(loc='best');
 
# lower right
    axes[1][1].plot(y.index, y, label='Original');
    axes[1][1].plot(y.index, y.rolling(window=12).mean(), label='12-Months Rolling Mean', color='crimson');
    axes[1][1].set_xlabel("Date");
    axes[1][1].set_ylabel("taux moyen journalier");
    axes[1][1].set_title("12-Months Moving Average");
    axes[1][1].legend(loc='best');
    plt.tight_layout();

In [None]:
def plot_rolling_average(y, window=12):
    '''
    Plot rolling mean and rolling standard deviation for a given time series and window
    '''
    # calculate moving averages
    rolling_mean = y.rolling(window=window).mean()
    rolling_std = y.rolling(window=window).std()
 
    # plot statistics
    plt.plot(y, label='Original')
    plt.plot(rolling_mean, color='crimson', label='Moving average mean')
    plt.plot(rolling_std, color='darkslateblue', label='Moving average standard deviation')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    return    

In [None]:
def effet_journalier(df):
    df['Day'] = df.index.day
    df['Month'] = df.index.strftime('%b')
    
# reshape data pour plot
    df_piv_line = df.pivot(index = 'Day', columns='Month', values='is_conv')
 
# create line plot
    df_piv_line.plot(colormap='jet')
    plt.title('Seasonal Effect per Day', fontsize=24)
    plt.ylabel('Taux moyen journalier')
    plt.legend(loc='best', bbox_to_anchor=(1.0, 0.5))
    plt.show()

# reshape date pour boxplot
    df_piv_box = df.pivot(index='Month', columns='Day', values='is_conv')

 
# create a box plot
    fig, ax = plt.subplots();
    df_piv_box.plot(ax=ax, kind='box');
    ax.set_title('Seasonal Effect per Day', fontsize=24);
    ax.set_xlabel('Day');
    ax.set_ylabel('Taux moyen journalier');
    ax.xaxis.set_ticks_position('bottom');
    fig.tight_layout();
    plt.show()


# Test de Dickey-Fuller (stationnarité de la série temporelle des taux de conversion) 

In [None]:
def adf_test(y):
    # perform Augmented Dickey Fuller test
    print('Results of Augmented Dickey-Fuller test:')
    dftest = adfuller(y, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['test statistic', 'p-value', '# of lags', '# of observations'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value ({})'.format(key)] = value
    print(dfoutput)

In [None]:
def ts_diagnostics(y, lags=None, title='Taux de conversion'):
    '''
    Calculate acf, pacf, qq plot and Augmented Dickey Fuller test for a given time series
    '''
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
        
    # weekly moving averages (5 day window because of workdays)
    rolling_mean = y.rolling(window=12).mean()
    rolling_std = y.rolling(window=12).std()
    
    fig = plt.figure(figsize=(14, 12))
    layout = (3, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))
    qq_ax = plt.subplot2grid(layout, (2, 0))
    hist_ax = plt.subplot2grid(layout, (2, 1))
    
    # time series plot
    y.plot(ax=ts_ax)
    rolling_mean.plot(ax=ts_ax, color='crimson');
    rolling_std.plot(ax=ts_ax, color='darkslateblue');
    plt.legend(loc='best')
    ts_ax.set_title(title, fontsize=24);
    
    # acf and pacf
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5) 
    
    # qq plot
    sm.qqplot(y, line='s', ax=qq_ax)
    qq_ax.set_title('QQ Plot')
    
    # hist plot
    y.plot(ax=hist_ax, kind='hist', bins=25);
    hist_ax.set_title('Histogram');
    plt.tight_layout();
    plt.show()
    
    # perform Augmented Dickey Fuller test
    print('Results of Dickey-Fuller test:')
    dftest = adfuller(y, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['test statistic', 'p-value', '# of lags', '# of observations'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    return