In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import accuracy_score
from warnings import catch_warnings
from warnings import filterwarnings

In [2]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [3]:
def create_dataset(data):
    X_train, Y_train= list(), list()
    final_choices = [x[-1] for x in data]
    features = [x[:-1] for x in data]
    return features, final_choices

In [4]:
def create_dataset_test(data):
    X_test, Y_test= list(), list()
    final_choices = [x[-1] for x in data]
    features = [x[:-1] for x in data]
    return features, final_choices

In [5]:
def normalise_outcomes(data):
    maxi = max([max(x) for x in data])
    mini = min([min(x) for x in data])
    for i in range(len(data)):
        for j in range(1,len(data[i]),2):
            data[i][j] -= mini
            data[i][j] /= (maxi-mini)

In [6]:
def parse_dataset(name, flag):
    infile = open(name, 'r')
    lines = infile.readlines()
    data = []
    for line in lines:
        data.append([float(x) for x in line.split(',')])
    normalise_outcomes(data)
    if(flag):
        return create_dataset(data)
    else:
        return create_dataset_test(data)

In [7]:
X_train, y_train = parse_dataset('../data/estimation_without_padding.csv', 1)
X_test, y_test = parse_dataset('../data/competition_without_padding.csv', 0)

In [8]:
X_train[0]
X_test[0]

[1.0,
 0.09444444444444447,
 2.0,
 0.0685185185185185,
 1.0,
 0.09444444444444447,
 1.0,
 0.09444444444444447,
 1.0,
 0.09444444444444447,
 1.0,
 0.09444444444444447,
 1.0,
 0.09444444444444447,
 1.0,
 0.09444444444444447]

In [9]:
# maxlen = max(max([len(x) for x in X_train]), max([len(x) for x in X_test]))
# X_train = pad_sequences(X_train, padding='post', value=0, dtype=float, maxlen=maxlen)
# X_test = pad_sequences(X_test, padding='post', value=0, dtype=float, maxlen=maxlen)

In [10]:
def sarima_forecast(orde, sor, tre, train,test):
    model = sm.tsa.SARIMAX(endog=train['choice'], exog=train[['outcome']], order=orde, seasonal_order=sord, 
                           trend=tre , enforce_invertibility=False, enforce_stationarity=False)
    start_params = np.r_[[0] * (model.k_params - 1), 1]
    result = model.fit(start_params=start_params, disp=False)
#     result=model.fit()
    yhat = result.predict(start = len(train), end = len(train), exog=test[['outcome']])
    return yhat[len(train)]

In [11]:
def to_matrix(l, n):
    return [l[i:i+n] for i in range(0, len(l), n)]

In [12]:
def check_performance(X_train, X_test, y_train, y_test, orde, sord, tren):
    # for train
    
    train_score, test_score=0,0
    predictions=[]
    for (idx,each) in enumerate(X_train):
        each = to_matrix(each,2)
        df = pd.DataFrame(each)
        df.columns = ['choice', 'outcome']
        df_train, df_test = df[:-1], df[-1:]
        for index,i in df_test.iterrows():
            yhat = sarima_forecast(orde, sord, tren, df_train, df_test)
            predictions.append(yhat)
    #       train = train.append(i)
    #       rmse = sqrt(mean_squared_error(df_te['MTTR'].values,predictions))
    #       combo = orde, sord, tren, rmse
    #       error.append(combo)
    #        print(yhat, y_train[idx])
    predictions_train=[]
    for pred in predictions:
        if(abs(pred-1)>abs(pred-2)):
            predictions_train.append(2)
        else:
            predictions_train.append(1)
    train_score = accuracy_score(predictions_train, y_train)
    
    
    # for test
    
    predictions=[]
    for (idx,each) in enumerate(X_test):
        each = to_matrix(each,2)
        df = pd.DataFrame(each)
        df.columns = ['choice', 'outcome']

        df_train, df_test = df[:-1], df[-1:]
        for index,i in df_test.iterrows():
            yhat = sarima_forecast(orde, sord, tren, df_train, df_test)
            predictions.append(yhat)
    
    predictions_test=[]
    for pred in predictions:
        if(abs(pred-1)>abs(pred-2)):
            predictions_test.append(2)
        else:
            predictions_test.append(1)
    test_score = accuracy_score(predictions_test, y_test)
    
    return (train_score, test_score)
    

In [13]:
# define config lists
p_params = [0, 1, 2]
d_params = [0, 1]
q_params = [0, 1, 2] 
P_params = [0, 1, 2]
D_params = [0, 1]
Q_params = [0, 1, 2]
t_params = ['n','c','t','ct']
m = 0 #seasonal

# create config instances
for p in p_params:
    for d in d_params:
        for q in q_params:
            for t in t_params:
                for P in P_params:
                    for D in D_params:
                        for Q in Q_params:
                            orde = [p,d,q]
                            sord = [P,D,Q,m]
                            tren = t
                            
                            train_score, test_score = 0,0
                            try:
                                # never show warnings when grid searching, too noisy
                                with catch_warnings():
                                    filterwarnings("ignore")
                                    (train_score, test_score) = check_performance(X_train, X_test, y_train, y_test,
                                                                             orde, sord, tren)
                            except:
                                train_score, test_score = None, None
                            
                            if(train_score is not None):
                                print(orde, sord, tren, (train_score, test_score))

[0, 0, 0] [0, 0, 0, 0] n (0.552991452991453, 0.4791318864774624)
[0, 0, 0] [1, 0, 1, 0] n (0.5692307692307692, 0.5592654424040067)
[0, 0, 0] [2, 0, 2, 0] n (0.5641025641025641, 0.5575959933222037)
[0, 0, 0] [0, 0, 0, 0] c (0.5769230769230769, 0.5601001669449082)
[0, 0, 0] [1, 0, 1, 0] c (0.564957264957265, 0.5667779632721202)
[0, 0, 0] [2, 0, 2, 0] c (0.558974358974359, 0.5400667779632721)
[0, 0, 0] [0, 0, 0, 0] t (0.49914529914529915, 0.48998330550918195)
[0, 0, 0] [1, 0, 1, 0] t (0.5572649572649573, 0.5392320534223706)
[0, 0, 0] [2, 0, 2, 0] t (0.5641025641025641, 0.5400667779632721)
[0, 0, 0] [0, 0, 0, 0] ct (0.5495726495726496, 0.5409015025041736)
[0, 0, 0] [1, 0, 1, 0] ct (0.5683760683760684, 0.5484140233722872)
[0, 0, 0] [2, 0, 2, 0] ct (0.5632478632478632, 0.5417362270450752)
[0, 0, 1] [0, 0, 0, 0] n (0.5794871794871795, 0.5400667779632721)
[0, 0, 1] [1, 0, 1, 0] n (0.5623931623931624, 0.5684474123539232)
[0, 0, 1] [2, 0, 2, 0] n (0.5512820512820513, 0.5342237061769616)
[0, 0, 1

[1, 1, 2] [0, 0, 0, 0] n (0.5641025641025641, 0.5584307178631052)
[1, 1, 2] [1, 0, 1, 0] n (0.5658119658119658, 0.5375626043405676)
[1, 1, 2] [2, 0, 2, 0] n (0.5658119658119658, 0.5275459098497496)
[1, 1, 2] [0, 0, 0, 0] c (0.5666666666666667, 0.5475792988313857)
[1, 1, 2] [1, 0, 1, 0] c (0.5615384615384615, 0.5358931552587646)
[1, 1, 2] [2, 0, 2, 0] c (0.5495726495726496, 0.5300500834724541)
[1, 1, 2] [0, 0, 0, 0] t (0.5581196581196581, 0.5342237061769616)
[1, 1, 2] [1, 0, 1, 0] t (0.5512820512820513, 0.5233722871452421)
[1, 1, 2] [2, 0, 2, 0] t (0.5367521367521367, 0.5208681135225376)
[1, 1, 2] [0, 0, 0, 0] ct (0.552991452991453, 0.5292153589315526)
[1, 1, 2] [1, 0, 1, 0] ct (0.5427350427350427, 0.5233722871452421)
[1, 1, 2] [2, 0, 2, 0] ct (0.5478632478632479, 0.5242070116861436)
[2, 0, 0] [0, 0, 0, 0] n (0.5606837606837607, 0.5642737896494157)
[2, 0, 0] [1, 0, 1, 0] n (0.5700854700854701, 0.5509181969949917)
[2, 0, 0] [2, 0, 2, 0] n (0.558974358974359, 0.5500834724540902)
[2, 0, 0]

In [14]:
# predictions=[]
# for (idx,each) in enumerate(X_train):
#     each = to_matrix(each,2)
#     df = pd.DataFrame(each)
#     df.columns = ['choice', 'outcome']
    
#     df_train, df_test = df[:-1], df[-1:]
    
#     tr = ['n']
#     error = []
#     for p in ([1]):
#         for q in ([1]):
#             for P in ([1]):
#                     for Q in ([1]):
#                         for m in ([5]):
#                             for t in tr:
#                                 orde = [p,1,q]
#                                 sord = [P,0,Q,m]
#                                 tren = t
#                                 for index,i in df_test.iterrows():
#                                     yhat = sarima_forecast(orde, sord, tren, df_train, df_test)
#                                     predictions.append(yhat)
# #                                     train = train.append(i)
# #                                 rmse = sqrt(mean_squared_error(df_te['MTTR'].values,predictions))
# #                                 combo = orde, sord, tren, rmse
# #                                 error.append(combo)
#                                     print(yhat, y_train[idx])
# #                                 print (orde, sord, tren, rmse)
    

In [15]:
# predictions_train=[]
# for pred in predictions:
#     if(abs(pred-1)>abs(pred-2)):
#         predictions_train.append(2)
#     else:
#         predictions_train.append(1)


In [16]:
# accuracy_score(predictions_train, y_train)

In [17]:
# predictions=[]
# for (idx,each) in enumerate(X_test):
#     each = to_matrix(each,2)
#     df = pd.DataFrame(each)
#     df.columns = ['choice', 'outcome']
    
#     df_train, df_test = df[:-1], df[-1:]
    
#     tr = ['n']
#     error = []
#     for p in ([1]):
#         for q in ([1]):
#             for P in ([1]):
#                     for Q in ([1]):
#                         for m in ([5]):
#                             for t in tr:
#                                 orde = [p,1,q]
#                                 sord = [P,0,Q,m]
#                                 tren = t
#                                 for index,i in df_test.iterrows():
#                                     yhat = sarima_forecast(orde, sord, tren, df_train, df_test)
#                                     predictions.append(yhat)
# #                                     train = train.append(i)
# #                                 rmse = sqrt(mean_squared_error(df_te['MTTR'].values,predictions))
# #                                 combo = orde, sord, tren, rmse
# #                                 error.append(combo)
#                                     print(yhat, y_test[idx])
# #                                 print (orde, sord, tren, rmse)
    

In [18]:
# predictions_train=[]
# for pred in predictions:
#     if(abs(pred-1)>abs(pred-2)):
#         predictions_train.append(2)
#     else:
#         predictions_train.append(1)

In [19]:
# accuracy_score(predictions_train, y_test)