Resources:

https://www.analyticsvidhya.com/blog/2016/02/time-series-forecasting-codes-python/

http://michaelpaulschramm.com/simple-time-series-trend-analysis/

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# import modules
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

# Load and preprocess data

In [None]:
# load excel table as dataframe
df = pd.read_excel(open('iiko.xlsx','rb'))

# delete Unnamed:6 column
del df['Unnamed: 6']

# rename columns
df.columns = ['Date_Time', 'Check', 'Product_group', 'Product', 'Units', 'Sum_before_discount', 'Sum_after_discount', 'Net_cost', 'Markup']

# delete rows containing всего
df = df[~(df.Check.str.contains("всего") == False) ]

# fill NaNs
df = df.fillna(method='ffill')

# convert Date_Time to datetime
df['Date_Time'] = pd.to_datetime(df['Date_Time'])
# add columns Date, Year, Months, Day, Hour
df['Date'] = df['Date_Time'].dt.date
df['Weekday'] = df['Date_Time'].dt.weekday
df['Year'] = df['Date_Time'].dt.year
df['Month'] = df['Date_Time'].dt.month
df['Day'] = df['Date_Time'].dt.day
df['Hour'] = df['Date_Time'].dt.hour

# add columns Discount, Profit, Price
df['Discount'] = df['Sum_before_discount'] - df['Sum_after_discount']
df['Profit'] = df['Sum_after_discount']-df['Net_cost']
df['Price'] = df['Sum_after_discount']/df['Units']

# rearrange columns
df = df [['Date_Time', 'Date', 'Weekday', 'Year', 'Month', 'Day', 'Hour', 'Check', 'Product_group', 'Product', 'Units', 'Sum_before_discount', 'Sum_after_discount', 'Net_cost', 'Price', 'Markup', 'Discount', 'Profit']]

# Data Clearance
df = df[(df['Units']>=0) & (df['Net_cost']>=0) & (df['Sum_before_discount']>=0) & (df['Sum_after_discount']>=0)]
# drop March (not enough data)
df = df[df['Month']>3]

# Revenue as Time Series

Visualize revenues

In [None]:
# group by date_time
grouping = df.groupby(['Date_Time'])
ts = grouping['Sum_after_discount'].sum()

# resample to weeks
ts = ts.resample('W').sum()
ts[np.isnan(ts)] = 0

#plot revenues
ts.plot(figsize=(12,6))

# Trend and monthly pattern analysis

In [None]:
#Check stationarity of ts by rolling statistics and Dickey-Fuller Test
# function for rolling mean,std and Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(window=2).mean()
    rolstd = timeseries.rolling(window=2).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print dfoutput

# Analyze trend by Mann-Kendall test

from scipy.stats import norm, mstats

def mk_test(x, alpha = 0.05):  
    """   
    Input:
        x:   a vector of data
        alpha: significance level (0.05 default)

    Output:
        trend: tells the trend (increasing, decreasing or no trend)
        h: True (if trend is present) or False (if trend is absence)
        p: p value of the significance test
        z: normalized test statistics 

    Examples
    --------
      >>> x = np.random.rand(100)
      >>> trend,h,p,z = mk_test(x,0.05) 
    """
    n = len(x)

    # calculate S 
    s = 0
    for k in range(n-1):
        for j in range(k+1,n):
            s += np.sign(x[j] - x[k])

    # calculate the unique data
    unique_x = np.unique(x)
    g = len(unique_x)

    # calculate the var(s)
    if n == g: # there is no tie
        var_s = (n*(n-1)*(2*n+5))/18
    else: # there are some ties in data
        tp = np.zeros(unique_x.shape)
        for i in range(len(unique_x)):
            tp[i] = sum(unique_x[i] == x)
        var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

    if s>0:
        z = (s - 1)/np.sqrt(var_s)
    elif s == 0:
            z = 0
    elif s<0:
        z = (s + 1)/np.sqrt(var_s)

    # calculate the p_value
    p = 2*(1-norm.cdf(abs(z))) # two tail test
    h = abs(z) > norm.ppf(1-alpha/2) 

    if (z<0) and h:
        trend = 'decreasing'
    elif (z>0) and h:
        trend = 'increasing'
    else:
        trend = 'no trend'

    return trend, h, p, z
    
def trend_pattern(timeseries):
    
    test_stationarity(ts)
    #Decompose into trend and season
    
    from statsmodels.tsa.seasonal import seasonal_decompose
    decomposition = seasonal_decompose(ts.values, freq = 4, model = 'additive')
    decplot = decomposition.plot()
    plt.show()
    
    #Plot seasonal pattern
    pattern = plt.bar(range(5,10), decomposition.seasonal[5:10]-np.min(decomposition.seasonal[5:10]))
    
    #Analyze trend by Mann-Kendall test
    # run trend analysis
    trend = decomposition.trend
    trend = trend[np.isfinite(trend)]
    test_trend,h,p,z = mk_test(trend,alpha=0.05)  
    print test_trend, h  
    print z, p  

In [None]:
trend_pattern(ts)