In [1]:
from LendingClubAutoencoder import preprocessing, autoencoders, training, testing

import torch

from datetime import datetime, timedelta

import os
import shutil

import json

# Model Training & Evaluation

In [2]:
evaluation_results_path = 'evaluation_results.json'

evaluation_results = {}

if os.path.exists(evaluation_results_path):
    with open(evaluation_results_path, 'r') as f:
        evaluation_results = json.load(f)
    print('Loaded Evaluation Results')

else:
    evaluation_results = {}
    
    print('Loaded Data Handler')
    #Data
    lending_club_data_handler = preprocessing.DataHandler(csv_path='local_data/all_lending_club_loan_data_2007-2018.csv')

    #Date Ranges
    train_start = datetime(2007, 1, 1)
    train_end = datetime(2017, 5, 31)

    validation_start = datetime(2017, 6, 1)
    validation_end = datetime(2017, 12, 31)

    test_year = 2018

    #Cross Validation
    #print('Running Cross Validation')
    #evaluation_results['cv_results'] = testing.cross_validate_vae(train_start, train_end)

    print('Preparing for Final Model')
    if os.path.exists('trained_models'):
        shutil.rmtree('trained_models')

    evaluation_results['evaluation_results'] = {}

    #Full evaluation
    train_data, train_mask = lending_club_data_handler.get_train_data(train_start, train_end)
    validation_data, validation_mask = lending_club_data_handler.get_test_data(validation_start, validation_end)

    train_loader = preprocessing.to_torch(train_data,train_mask)
    validation_loader = preprocessing.to_torch(validation_data,validation_mask)

    # Instantiate model and optimiser 
    model = autoencoders.VariationalAutoencoder(input_size=len(train_data[0]))
    optimiser = torch.optim.Adam(model.parameters(), lr=1e-5)#original is 1e-3

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Train model 
    print('Training Final Model')
    training.train_variational_autoencoder(model, optimiser, train_loader, validation_loader, device=device)

    print('Evaluating Final Model')
    quarter_counter = 1
    for start_month, end_month in zip([1,4,7,10], [3,6,9,12]):
        test_start = datetime(test_year, start_month, 1)
        
        try:
            test_end = datetime(test_year, end_month, 31)
        except ValueError:
            test_end = datetime(test_year, end_month, 30)

        test_data, test_mask = lending_club_data_handler.get_test_data(test_start, test_end)
        test_loader = preprocessing.to_torch(test_data,test_mask)

        model_file_name = f'../trained_models/vae_best-input_size:{len(train_data[0])}-hidden_size:64-latent_size:32.pt'

        total_loss, mse_loss, kl_loss = testing.test_vae(model_file_name, test_loader, device)

        evaluation_results['evaluation_results'][f'q{quarter_counter}'] = {
            'total_loss': total_loss,
            'mse_loss': mse_loss,
            'kl_loss': kl_loss
        }

    json.dump(evaluation_results, open(evaluation_results_path, 'w'))


Loaded Data Handler
Preparing for Final Model
Training Final Model
Epoch 1/100:
  Training Loss: 0.6142
  Validation Loss: 0.2098
Epoch 2/100:
  Training Loss: 0.1916
  Validation Loss: 0.2068
Epoch 3/100:
  Training Loss: 0.1902
  Validation Loss: 0.2063
Epoch 4/100:
  Training Loss: 0.1899
  Validation Loss: 0.2062
Epoch 5/100:
  Training Loss: 0.1898
  Validation Loss: 0.2062
Epoch 6/100:
  Training Loss: 0.1898
  Validation Loss: 0.2061
Epoch 7/100:
  Training Loss: 0.1897
  Validation Loss: 0.2061
Epoch 8/100:
  Training Loss: 0.1897
  Validation Loss: 0.2061
Epoch 9/100:
  Training Loss: 0.1897
  Validation Loss: 0.2061
Epoch 10/100:
  Training Loss: 0.1897
  Validation Loss: 0.2062
Epoch 11/100:
  Training Loss: 0.1897
  Validation Loss: 0.2061
Epoch 12/100:
  Training Loss: 0.1897
  Validation Loss: 0.2061
Epoch 13/100:
  Training Loss: 0.1897
  Validation Loss: 0.2062


KeyboardInterrupt: 