In [1]:
def import_station_data(path,station_name):
    station_data = pd.read_csv(path+"/"+station_name+".csv").drop("Unnamed: 0",axis=1)
    return station_data

def preprocess(path,station_name,dependent_variable_label):
    data = import_station_data(path,station_name)
    correlation = data.corr().round(2)
    index = correlation.index
    for i in correlation.index:
        if i in index:
            temp = correlation.loc[i]
            temp = temp[temp==1]
            for j in temp.index:
                if j != i:
                    if j not in index:
                        continue
                    index = index.drop(j)

    preprocessed_data = pd.DataFrame()
    preprocessed_data['station'] = data['station']               
    preprocessed_data['time'] = data['time']
    preprocessed_data[index] = data[index]
    

    if dependent_variable_label == 'temperature':
        dependent_variable = 'temp_2m_avg [degF]'
        independent_feature_label = preprocessed_data.columns.drop(['temp_2m_avg [degF]'])
    elif dependent_variable_label == 'humidity':
        dependent_variable = 'relative_humidity_avg [percent]'
        independent_feature_label = preprocessed_data.columns.drop(['relative_humidity_avg [percent]'])
    elif dependent_variable_label == 'precipitation':
        dependent_variable = 'precip_total [inch]'
        independent_feature_label = preprocessed_data.columns.drop(['precip_total [inch]'])
    x = preprocessed_data[independent_feature_label]
    y = preprocessed_data[dependent_variable]
    
    return preprocessed_data,x,y
    
    
def classical_time_series_data(path,station_name,dependent_variable_label):
    data_preprocessed = preprocess(path,station_name,dependent_variable_label)
    if dependent_variable_label == 'temperature':
        dependent_variable = 'temp_2m_avg [degF]'
    elif dependent_variable_label == 'humidity':
        dependent_variable = 'relative_humidity_avg [percent]'
    elif dependent_variable_label == 'precipitation':
        dependent_variable = 'precip_total [inch]'    
    data_classical = data_preprocessed[0][['station','time',dependent_variable]]
    return data_classical

def time_coversion(time_col): #Converts time from string to datetime
    time_list = [time_col[i].split()[0] for i in range(len(time_col))]
    return pd.to_datetime(time_list)



In [1]:


def data_prep(dpath, station_name,target_variable):
    data = classical_time_series_data(dpath,station_name,target_variable)
    data.time = time_coversion(data.time)
    return data

def TrainTestSplit(data,target_variable):
    train_year = [2018,2019,2020]
    test_year = [2021,2022]
    if target_variable == 'temperature':
        target = 'temp_2m_avg [degF]'
    elif target_variable == 'humidity':
        target = 'relative_humidity_avg [percent]'
    elif target_variable == 'precipitation':
        target = 'precip_total [inch]'
    train = pd.DataFrame()
    for year in train_year:
         train = train.append(data[(data['time'].dt.year == year)])   
    train = train.drop(train[train[target].isna()].index)
    
    test = pd.DataFrame()
    for year in test_year:
         test = test.append(data[(data['time'].dt.year == year)]) 
    test = test.drop(test[test[target].isna()].index)
    return train,test

def AR_model(path,train,test,target_variable,p,d,q,station_name,model_name,train_method,seasonal_order=(0,0,0,0)):
    if target_variable == 'temperature':
        target = 'temp_2m_avg [degF]'
    elif target_variable == 'humidity':
        target = 'relative_humidity_avg [percent]'
    elif target_variable == 'precipitation':
        target = 'precip_total [inch]'
    
    if model_name == 'AR':
        model = AutoReg(train[target], lags=p)
        model_fit = model.fit()


    elif model_name == 'ARIMA':
        model = ARIMA(train[target], order=(p,d,q))
        model_fit = model.fit()


    elif model_name == 'SARIMA':
        model = ARIMA(train[target],order=(0,0,0),seasonal_order=seasonal_order,enforce_stationarity=False)
        model_fit = model.fit(low_memory = True, cov_type = None)


    
    # make prediction
    y_pred = model_fit.predict(1, len(test))
    prediction = pd.DataFrame({"time":test['time'].reset_index(drop=True),'predicted':y_pred.fillna(0).reset_index(drop=True),'actual':test[target].reset_index(drop=True)})
    prediction.to_csv(path+"/"+model_name+"/Prediction/"+station_name+"_"+model_name+"_"+train_method+"_"+target_variable+"_"+str(window)+".csv")
    mae = mean_absolute_error(test[target],y_pred.fillna(0))

    plt.figure(figsize=(20,10))
    plt.plot(test['time'],test[target],'--',label='actual')
    plt.plot(test['time'],y_pred,'-',label='prediction')
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel(target_variable)
    plt.title("Time series prediction of "+target_variable+" for the location "+station_name+" using "+model_name)
    plt.savefig(path+"/"+model_name+"/"+station_name+"_"+model_name+"_"+train_method+"_"+target_variable+"_"+str(window)+".png")
    plt.close()
    return mae

def ARIMA_compute_d(data,target_variable):
    if target_variable == 'temperature':
        target = 'temp_2m_avg [degF]'
    elif target_variable == 'humidity':
        target = 'relative_humidity_avg [percent]'
    elif target_variable == 'precipitation':
        target = 'precip_total [inch]'
    temp = data[target].dropna()
    score = adfuller(temp)[1]
    d = 0
    while score > 0.05:
        temp = temp.diff().dropna()
        score = adfuller(temp)[1]
        d = d+1
        
    return d



In [3]:
def interpretation_text_classical(path,station_name,model_name,train_method,target_variable,window):
    file = open(path+"/"+station_name+"_"+model_name+"_"+train_method+"_"+target_variable+"_"+str(window)+".txt","w+") 
    if model_name=="AR" or  model_name=="ARIMA" or model_name=='SARIMA' or model_name=='RF' or model_name == 'SVR':
        if target_variable=='temperature':
            text = "The model can estimate the trend, however it shows a large delay."
        elif target_variable=='humidity':
            text = "The model can estimate the trend, however it shows a large delay. It also fails to defect the spikes"
        elif target_variable=='precipitation':
            text = "The model fails to defect the spikes completely." 
    elif model_name=="LR" or  model_name=="RNN" or model_name=='LSTM':
        if target_variable=='temperature':
            text = "The model works well at predicting the trend and also can predict the spikes and variations."
        elif target_variable=='humidity':
            text = "The model works well at predicting the trend and also can predict the spikes and variations."
        elif target_variable=='precipitation':
            text = "The model works well at predicting the trend and also can predict the variations. However, it fails to predict the spikes. " 
    

    file.write(text)
