In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import torch 

from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler, FunctionTransformer
from transformer_encoder_decoder_qr_model import TransformerEncoderDecoderQRModel

import random 
import os
import sys 
import copy 

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

In [None]:
nhead =  4      # number of heads in the multi-head attention models
d_model = 32    # model dimension
d_hid = 64      # dimension of the feedforward network model
nlayers = 2     # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
dropout = 0.1   # dropout rate

input_length  = 4  # Number of data points in src
output_length = 1  # Number of data points in tgt
weeks_ahead = 1
nfeatures  = 1     # Dimension of data - currently only 1d timeseries data
batch_size = 8

quantiles = [0.010, 0.025, 0.050, 0.100, 0.150, 0.200, 0.250, 0.300, 0.350,
             0.400, 0.450, 0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800,
             0.850, 0.900, 0.950, 0.975, 0.990]
nquantiles = len(quantiles)

q_median_ind = quantiles.index(0.5)
q_low_ind = quantiles.index(0.025)
q_high_ind = quantiles.index(0.975)

In [None]:
def load_ili_data(state):
    
    if(state=='US'):
        ILI_df = pd.read_csv('./data/ILI_national_2002_2024.csv')
        ILI_df = ILI_df[['date','year','week','weighted_ili']]
    else:
        ILI_df = pd.read_csv('./data/ILI_states_2010_2024.csv')
        ILI_df = ILI_df[['date','year','week',state]]
        ILI_df = ILI_df.rename(columns={state:'weighted_ili'})

    ILI_df = ILI_df[ILI_df.week!=53] #ignore week 53 for now
    ILI_df['date'] = pd.to_datetime(ILI_df['date'])
    ILI_df = ILI_df.set_index('date')
    return ILI_df

In [None]:
state = 'US'
ILI_df = load_ili_data(state)

start_covid_date = pd.Timestamp('2020-06-28') 
start_test_date = pd.Timestamp('2022-07-01') 

train_ind = np.where(ILI_df.index<start_covid_date)[0]
covid_ind = np.where((ILI_df.index>=start_covid_date) & (ILI_df.index<start_test_date))[0]
test_ind = np.where((ILI_df.index>=start_test_date))[0]

plt.figure(figsize=(10,4))
plt.plot(ILI_df.loc[ILI_df.iloc[train_ind].index,'weighted_ili'],label='train') 
plt.plot(ILI_df.loc[ILI_df.iloc[covid_ind].index,'weighted_ili'],label='throw (covid)') 
plt.plot(ILI_df.loc[ILI_df.iloc[test_ind].index,'weighted_ili'],label='test') 
plt.ylabel('weighted ILI') 
plt.xlabel('week') 
plt.title('ILI - {}'.format(state)) 
plt.legend(loc=2)
plt.show()

In [None]:
ili = np.expand_dims(ILI_df['weighted_ili'].values,-1)
train_ili = ili[train_ind]
test_ili = ili[test_ind]

test_dates = ILI_df.index[test_ind]

scaler = MinMaxScaler() #FunctionTransformer(lambda x: x) 
scaler.fit(train_ili)
train_data = scaler.transform(train_ili)
test_data = scaler.transform(test_ili)

print(len(train_data))
print(len(test_data))

In [None]:
def get_data_loader(ts_data, pred, input_length, output_length, weeks_ahead, batch_size,
                    shuffle=True, drop_last=True):
    
    SOS = np.float32(-2) #0
    # EOS = np.float32(2) #0

    ix_start = 0
    if pred is not None:
        ix_start = max(0,(len(ts_data)-len(pred))- (input_length+1))
        pred = np.expand_dims(np.append(np.repeat(np.nan, len(ts_data)-len(pred)),pred[:,q_median_ind]),-1)
        
    ix = range(ix_start, len(ts_data) - input_length - output_length - weeks_ahead + 2)
    inputs = []
    targets = []

    for i in ix:
        input_sequence = ts_data[i:i+input_length]
        if pred is not None:
            input_sequence = np.append(input_sequence[(weeks_ahead-1):],pred[(i+input_length+1):(i+input_length+weeks_ahead)])
        target_sequence = np.insert(ts_data[i+input_length+weeks_ahead-1:i+input_length+output_length+weeks_ahead-1],0,SOS)
        # target_sequence = np.append(np.insert(ts_data[i+input_length+weeks_ahead-1:i+input_length+output_length+weeks_ahead-1],0,SOS),EOS)
        inputs.append(torch.from_numpy(input_sequence.astype(np.float32)))
        targets.append(torch.from_numpy(target_sequence.astype(np.float32)))

    # Convert lists to tensors
    inputs = torch.stack(inputs)
    targets = torch.stack(targets)

    # Create dataset and dataloader
    dataset = TensorDataset(inputs, targets)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    return (data_loader)

In [None]:
def quantile_loss(q, y, f):
    # q: quantile, y: true values, f: predicted quantiles
    e = (y - f)
    return torch.max(q * e, (q - 1) * e).mean()

In [None]:
def wis_loss(q, y, f):

    # Generate quantile pairs
    n = len(q)
    quantile_pairs_ind = [(i, n-i-1) for i in range(n//2)]

    # Calculate Interval Scores (IS) for specified pairs of quantiles
    wis = 0
    for (q_lower_ind, q_upper_ind) in quantile_pairs_ind:
        p = q[q_upper_ind] - q[q_lower_ind]
        alpha = 1 - p

        # Retrieve predictions for the upper and lower quantiles
        L = f[:,:,q_lower_ind]
        U = f[:,:,q_upper_ind]

        # Interval Score calculation
        IS = (U - L) + (2 / alpha) * ((L - y) * (y < L) + (y - U) * (y > U))
        
        # Weight for each interval score, using alpha/2 as described
        wis += (alpha / 2) * IS.mean()  # mean of IS over all observations

    # Evaluate median accuracy separately if it is a distinct quantile
    if 0.5 in q:
        median_predictions = f[:,:,q.index(0.5)]
        median_error = abs(median_predictions - y).mean()
        wis += median_error

    K = len(quantile_pairs_ind)
    wis = wis/(K+0.5)
    return wis

In [None]:
def run_training_loop(num_epochs, optimizer, model, min_loss, training_loss, training_loader):

    model.train()
    best_model = copy.deepcopy(model) 
    cur_epoch = len(training_loss)
    for epoch in range(cur_epoch,cur_epoch+num_epochs):

        avg_train_loss = 0
        for input, target in training_loader:
            input, target = input.to(device), target.to(device)
            tgt_input = target[:, :-1] 
            tgt_output = target[:, 1:] 
            output = model(input, tgt_input) 
            optimizer.zero_grad()
            loss = sum(quantile_loss(q, tgt_output.reshape(-1), output[:,:,i].view(-1))
                            for i, q in enumerate(quantiles))    
            # loss = wis_loss(quantiles, tgt_output, output)  
            loss.backward()
            optimizer.step()
            avg_train_loss += loss.item() * input.size(0)
        avg_train_loss /= len(training_loader.dataset)
        training_loss.append(avg_train_loss) 
        if avg_train_loss < min_loss:
            best_model = copy.deepcopy(model) 
            min_loss = avg_train_loss
        print(f'epoch {epoch}: train loss - {round(avg_train_loss, 4)}')

    return (best_model, optimizer, min_loss, training_loss)

In [None]:
def get_model_pred(data, prev_pred, model, weeks_ahead):

    data_loader = get_data_loader(data, prev_pred, input_length, output_length, 
                                  weeks_ahead, batch_size, shuffle=False, drop_last=False)
    pred = []
    model.eval()           # Set the model to evaluation mode
    with torch.no_grad():  # No gradients needed for validation, reduces memory and computation
        avg_loss = 0
        for input, target in data_loader:
            input, target = input.to(device), target.to(device)
            tgt_input = target[:, :-1] 
            tgt_output = target[:, 1:] 
            output = model(input, tgt_input) 
            loss = wis_loss(quantiles, tgt_output, output)       
            avg_loss += loss.item() * input.size(0)
            # out = output[:,weeks_ahead-1,:].to('cpu').detach().numpy().squeeze()
            out = output[:,0,:].to('cpu').detach().numpy().squeeze()
            if(len(out.shape)==1):
                out = np.expand_dims(out,0) #last batch may contain only one input
            pred.append(out)
        avg_loss /= len(data_loader.dataset)
        print(avg_loss)

    pred = np.concatenate(pred, axis=0)
    return pred

In [None]:
def train_and_pred(train_data, test_data, prev_pred_train, prev_pred_test, weeks_ahead):

    training_loader   = get_data_loader(train_data, prev_pred_train, 
                                        input_length, output_length, 
                                        weeks_ahead, batch_size)

    #initializing the model
    model = TransformerEncoderDecoderQRModel(nhead, d_model, d_hid, nlayers, 
                                             input_length, output_length, 
                                             nfeatures, nquantiles, dropout)
    model = model.to(device)

    #training
    min_loss = sys.maxsize 
    training_loss = [] 

    # Training loop
    optimizer = torch.optim.Adam(params = model.parameters(), lr = 1e-3) 
    (model, optimizer, min_loss, training_loss) = \
        run_training_loop(50, optimizer, model, min_loss, training_loss, training_loader)
    optimizer = torch.optim.Adam(params = model.parameters(), lr = 1e-4) 
    (model, optimizer, min_loss, training_loss) = \
        run_training_loop(10, optimizer, model, min_loss, training_loss, training_loader)
    optimizer = torch.optim.Adam(params = model.parameters(), lr = 1e-5) 
    (model, optimizer, min_loss, training_loss) = \
        run_training_loop(10, optimizer, model, min_loss, training_loss, training_loader)

    #training progress
    plot_training_progress = True
    if(plot_training_progress):
        plt.figure()
        plt.title('Training') 
        plt.yscale('log') 
        plt.plot(training_loss, label = 'training') 
        plt.ylabel('Loss') 
        plt.xlabel('Epoch') 
        plt.legend() 
        plt.show()
    
    pred_train = get_model_pred(train_data, prev_pred_train, model, weeks_ahead)
    pred_test =  get_model_pred(test_data, prev_pred_test, model, weeks_ahead)
    return (model, pred_train, pred_test)
    

In [None]:
random.seed(1) 
torch.manual_seed(1)

weeks_ahead1 = 1
model1, pred_train1, pred_test1 = train_and_pred(train_data, test_data,
                                                 None, None, weeks_ahead1)

weeks_ahead2 = 2
model2, pred_train2, pred_test2 = train_and_pred(train_data, test_data,
                                                 pred_train1, pred_test1, weeks_ahead2)

weeks_ahead3 = 3
model3, pred_train3, pred_test3 = train_and_pred(train_data, test_data,
                                                 pred_train2, pred_test2, weeks_ahead3)

weeks_ahead4 = 4
model4, pred_train4, pred_test4 = train_and_pred(train_data, test_data,
                                                 pred_train3, pred_test3, weeks_ahead4)

weeks_ahead5 = 5
model5, pred_train5, pred_test5 = train_and_pred(train_data, test_data,
                                                 pred_train4, pred_test4, weeks_ahead5)


In [None]:
def get_test_ili_data_and_pred(pred_test, weeks_ahead):
    pred_test_ili = scaler.inverse_transform(pred_test)
    #slice test_ili and test_dates to same dates as pred dates
    test_ili_slice = test_ili[input_length+(weeks_ahead-1):]
    test_ili_slice = test_ili_slice[:len(pred_test_ili)]
    test_dates_slice = test_dates[input_length+(weeks_ahead-1):]
    test_dates_slice = test_dates_slice[:len(pred_test_ili)]
    return (pred_test_ili,test_ili_slice,test_dates_slice)


In [None]:
pred_test_ili1, test_ili1, test_dates1 = get_test_ili_data_and_pred(pred_test1, weeks_ahead1)
pred_test_ili2, test_ili2, test_dates2 = get_test_ili_data_and_pred(pred_test2, weeks_ahead2)
pred_test_ili3, test_ili3, test_dates3 = get_test_ili_data_and_pred(pred_test3, weeks_ahead3)
pred_test_ili4, test_ili4, test_dates4 = get_test_ili_data_and_pred(pred_test4, weeks_ahead4)
pred_test_ili5, test_ili5, test_dates5 = get_test_ili_data_and_pred(pred_test5, weeks_ahead5)

In [None]:
def plot_pred_test(pred_test_ili, test_ili, test_dates, weeks_ahead):
    wis_score_test = np.round(wis_loss(quantiles,test_ili,np.expand_dims(pred_test_ili,1)),3)
    print(f'Weighted Interval Score - weeks_ahead={weeks_ahead} (length={len(test_ili)}): {wis_score_test}')

    q_median_ind = quantiles.index(0.5)
    lowq = 0.025
    uppq = 0.975
    q_low_ind = quantiles.index(lowq) #quantiles.index(0.25) #
    q_high_ind = quantiles.index(uppq) #quantiles.index(0.75) #
    plt.figure(figsize=(10, 4))
    plt.plot(test_dates,test_ili,'o--',markersize=3,label='data',color='black',alpha=0.75)
    plt.plot(test_dates,pred_test_ili[:,q_median_ind], label='pred median',alpha=0.75, color='green')
    plt.plot(test_dates,pred_test_ili[:,q_low_ind], label='pred low ({})'.format(lowq),alpha=0.75, color='blue')
    plt.plot(test_dates,pred_test_ili[:,q_high_ind], label='pred high ({})'.format(uppq),alpha=0.75, color='red')
    plt.xlabel('week')
    plt.ylabel('ILI')
    plt.title('state={}, horizon={} weeks (WIS={})'.format(state,weeks_ahead,wis_score_test))
    plt.legend(loc=0)

In [None]:
plot_pred_test(pred_test_ili1, test_ili1, test_dates1, weeks_ahead1)
plot_pred_test(pred_test_ili2, test_ili2, test_dates2, weeks_ahead2)
plot_pred_test(pred_test_ili3, test_ili3, test_dates3, weeks_ahead3)
plot_pred_test(pred_test_ili4, test_ili4, test_dates4, weeks_ahead4)
plot_pred_test(pred_test_ili5, test_ili5, test_dates5, weeks_ahead5)

In [None]:
def save_model_and_pred(model, weeks_ahead, test_dates, test_ili, pred_test_ili):
    
    models_folder = './models'
    if not os.path.isdir(models_folder):
        os.makedirs(models_folder)
    model_file = '{}/model_ili_{}_{}w_horizon.pth'.format(models_folder,state,weeks_ahead)
    torch.save({'model_state_dict': model.state_dict()}, model_file)

    df1 = pd.DataFrame(index=test_dates, data=test_ili, columns=['ILI'])
    df2 = pd.DataFrame(index=test_dates, data=pred_test_ili, columns=quantiles)
    df = pd.concat([df1, df2], axis=1)
    output_folder = './output'
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    output_file = '{}/forecasts_ili_{}_{}w_horizon.csv'.format(output_folder,state,weeks_ahead)
    df.to_csv(output_file)


In [None]:
save_model_and_pred(model1, weeks_ahead1, test_dates1, test_ili1, pred_test_ili1)
save_model_and_pred(model2, weeks_ahead2, test_dates2, test_ili2, pred_test_ili2)
save_model_and_pred(model3, weeks_ahead3, test_dates3, test_ili3, pred_test_ili3)
save_model_and_pred(model4, weeks_ahead4, test_dates4, test_ili4, pred_test_ili4)
save_model_and_pred(model5, weeks_ahead5, test_dates5, test_ili5, pred_test_ili5)