### Model Template:

##### We're going to have loads of models flying around, so it will be really helpful if we can all write models like this so everything's standardised.

##### Essentially, we want all of our prediction objects to have a "train" function (if necessary) and a "predict" function. 

##### What we'll do is for each epidemic under consideration, we'll split the data in two and pass the first part to the train function. The test function is given both the whole dataset (all_df) and the test dataset (test_df).



In [82]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

class Linear_Regressor:
    
    ###############################################
    
    '''
    
    Leave this one unchanged except for the model name!
    
    '''
    
    def __init__(self):
        self.needs_training = True 
        self.name = 'Demo'
    
    ##############################################
    
    '''
    
    train_df will be a dataframe, with rows sorted in chronological order (most recent last).
    
    The dataframe will be of the form:
    
    Region |  Week  |  Cases
    __________________________
    Oxford |   23   |  19284
    
    
    Note that the week simply refers to the week of the year (to allow for seasonality to be incorporated).
    
    
    ''' 
    
    
    def train(self, train_df):
        self.needs_training = False ### Keep this here!
        
        ###### Process Data (we want an array with current, last week, and two weeks ago cases)
        
        
        split_df = [group for _, group in train_df.groupby('Region')]


        processed_data = np.zeros((len(train_df)-2*len(split_df),3))

        curr_row = 0
        for df in split_df:
            df['Back1'] = df['Cases'].shift(periods = 1)
            df['Back2'] = df['Cases'].shift(periods = 2)
            df = df.dropna()
            processed_data[curr_row:curr_row + len(df)] = df[['Cases','Back1','Back2']].to_numpy()
            curr_row += len(df)

            
        #### Our model is current = c[0]*last_week + c[1]*two_weeks_ago
            
        def Loss(c):

            return np.sum(np.square(c[0]*processed_data[:,1] + c[1]*processed_data[:,2] - processed_data[:,0]))

        #### Store trained parameter for prediction function
        
        self.c = minimize(Loss,np.ones(2)).x 
    
    '''
    
    There should be no outputs from train_df
    
    '''
    
    
    ##############################################
    
    
    '''
    
    test_df and all_df will be the same structure as train_df
    
    '''
    
    
    def predict(self, test_df,all_df,weeks_ahead):
        dfout_train,test_df,all_df = ILI_Data()

        all_df['Ind'] = np.arange(len(all_df)) ### Add index so we can map back our predictions later


        ###### Process Data (we want an array with current, last week, and two weeks ago cases)

        split_df = [group for _, group in all_df.groupby('Region')]


        processed_data = np.zeros((len(all_df)-len(split_df),3))

        curr_row = 0
        for df in split_df:
            df['Back1'] = df['Cases'].shift(periods = 1)
            df = df.dropna()
            processed_data[curr_row:curr_row + len(df)] = df[['Ind','Back1','Cases']].to_numpy()
            
            
            
            curr_row += len(df)




        #### Perform Forecasts

        outputs_temp = np.zeros((len(processed_data),weeks_ahead+3))
        outputs_temp[:,:3] = processed_data
        for ahead in range(3,3+weeks_ahead):
            outputs_temp[:,ahead] = self.c[1]*outputs_temp[:,ahead-2] + self.c[0]*outputs_temp[:,ahead-1]

        #print(outputs_temp[:,1:])
        #print(outputs_temp[:,0])
        #### Final Outputs (on all data)

        outputs_final = np.zeros((len(all_df),weeks_ahead))
        outputs_final[outputs_temp[:,0].astype(int)] = outputs_temp[:,3:]


        ##### Trim to test data
        columns = []
        for ahead in range(weeks_ahead):
            all_df['Prediction ' + str(ahead+1)] = outputs_final[:,ahead]
            columns.append('Prediction ' + str(ahead+1))

        test_df = pd.merge(test_df,all_df[columns],left_index=True,right_index=True)
        
        return test_df
    
    '''
    
    The outputs should be a df with predictions added to test_df as the rightmost columns 
    
    '''
    
    ################################################
    def name(self):
        
        return model.name
    
    
    


In [126]:
def Process_Scores(predictions,dfout_test,weeks_ahead,name):
    to_append = [name]
    dfout_test['Ind'] = np.arange(len(dfout_test))
    true_data = np.zeros((len(dfout_test),weeks_ahead))
    
    split_df = [group for _, group in dfout_test.groupby('Region')]
    curr_row = 0
    for df in split_df:
        for ahead in range(weeks_ahead):
            df['Forward '+ str(ahead+1)] = df['Cases'].shift(periods = -ahead-1)
        df = df.fillna(-1)
        true_data[df['Ind'].to_numpy().astype(int)] = df.to_numpy()[:,-weeks_ahead:]
            
            
            
        curr_row += len(df)
    
    
    pred_data = predictions.to_numpy()[:,-weeks_ahead:].astype(float)
    
    for week in range(weeks_ahead):
        
        to_append.append(np.sqrt(np.mean(np.square(true_data[true_data[:,week] !=-1 ,week] - pred_data[true_data[:,week] !=-1 ,week] ))))
        
    return to_append



def ILI_Data():
    df = pd.read_csv('ILINet.csv',skiprows=1)[['REGION','YEAR','WEEK','ILITOTAL']]
    df = df[df['ILITOTAL']!='X']
    df_out = df[['REGION','WEEK','ILITOTAL','YEAR']]
    df_out.columns = ['Region','Week','Cases','Year']
    
    
    dfout_train = df_out[df_out['Year'] + df_out['Week']*0.01 < 2015.25]
    
    
    dfout_test = df_out[(df_out['Year'] + df_out['Week']*0.01 >= 2015.25)&(df_out['Year'] + df_out['Week']*0.01 < 2019.25)]
    
    
    return dfout_train[['Region','Week','Cases']],dfout_test[['Region','Week','Cases']],df_out[['Region','Week','Cases']]



def ILI_Test(models,weeks_ahead):
    
    
    #########################################################################
    
    '''
    
    This is the only thing we'll need to change to run this on a different dataset.
    
    '''
    
    
    dfout_train,dfout_test,dfout = ILI_Data()
    
    ##########################################################################
    
    #Creating Output Array
    
    output_predictions = []
    
    sc_array = ['Model']
    for week in range(weeks_ahead):
        sc_array.append('Week ' + str(week + 1))
    
    scores = [sc_array]
    
    
    
    ############################################################################
    
    #Testing models
    
    for model in models:
        predictor = model()
        
        predictor.train(dfout_train)
        
        predictions = predictor.predict(dfout_test,dfout,weeks_ahead)

        score_row = Process_Scores(predictions,dfout_test,weeks_ahead,predictor.name)
        
        scores.append(score_row)
        
        
    ##############################################################################    
        
    scores = pd.DataFrame(scores[1:],columns = scores[0])

    
    return scores
        
        
        
        
    
    


    


In [128]:
models = [Linear_Regressor]
ILI_Test(models,5)

Unnamed: 0,Model,Week 1,Week 2,Week 3,Week 4,Week 5
0,Demo,158.380565,269.6163,361.015936,431.983737,484.043647
