# NeuralNetwork.ipynb

Purpose: Create a neural network to model launch scrub/no scrub based on weather conditions.


In [1]:
import pandas as pd
import numpy as np
import pathlib
import torch
import torch.nn as nn
import torch.nn.functional as F


### Data Gathering

Data was produced from Bravo-Wx-Launch hackathon team. They created the raw-data-transform-multi.py script to compile of the data. I took all of this data and placed it into a single dataframe for analysis.

In [2]:
if pathlib.Path('combine.csv').exists():
    df = pd.read_csv('combine.csv',index_col=0,header=0)
else:
    base_dir = pathlib.Path('/home/bearmint/projects/bravo-wx-launch/bravo-wx-launch/test-runs/test-run-20220722-0845 (complete)')
    df_list = []
    for filename in list(pathlib.Path.iterdir(base_dir)):
        with open(pathlib.Path.joinpath(base_dir, filename), 'r') as f:
            df_list.append(pd.read_csv(f))
            
    df = pd.concat(df_list, ignore_index=True)
    df.set_index('Unnamed: 0', inplace=True)
    df.to_csv('combine.csv')

In [3]:
df

Unnamed: 0_level_0,Balloon Wind Speed,Balloon Precipitable Water,Field Mill Mean,Sum of Lightning Strike Signals,Count of Lightning Strikes,Rain Gauge Inches,Avg Wind Speed 0002 NW SE,Avg Wind Speed 0002 SE SE,Avg Wind Speed 0006 NW SE,Avg Wind Speed 0006 SE SE,...,Peak Wind Speed 0006 NW NW,Peak Wind Speed 0006 SE NW,Deviation 0006 NW NW,Deviation 0006 SE NW,Temp 0006 NW NW,Temp 0006 SE NW,Temperature Difference 0006 NW NW,Temperature Difference 0006 SE NW,Barometric Pressure 0006 NW NW,Barometric Pressure 0006 SE NW
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-12-21 21:29:00,20.9,5.0,140.290323,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-12-21 21:34:00,20.9,5.0,141.903226,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-12-21 21:39:00,20.9,5.0,142.903226,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-12-21 21:44:00,20.9,5.0,147.322581,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-12-21 21:49:00,20.9,5.0,147.129032,0.0,0.0,0,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-03-14 02:24:00,,,118.225806,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-03-14 02:29:00,,,123.580645,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-03-14 02:34:00,,,102.580645,0.0,0.0,0,,,,,...,,,,,,,,,,
2015-03-14 02:39:00,,,75.193548,0.0,0.0,0,,,,,...,,,,,,,,,,


In [4]:
pd.options.display.max_columns = 200
df.scrub_id.value_counts()

0    5488
1    1372
Name: scrub_id, dtype: int64

In [5]:
df.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 6860 entries, 2015-12-21 21:29:00 to 2015-03-14 02:44:00
Data columns (total 175 columns):
 #    Column                                       Non-Null Count  Dtype  
---   ------                                       --------------  -----  
 0    Balloon Wind Speed                           5978 non-null   float64
 1    Balloon Precipitable Water                   5978 non-null   float64
 2    Field Mill Mean                              6076 non-null   float64
 3    Sum of Lightning Strike Signals              6860 non-null   float64
 4    Count of Lightning Strikes                   6860 non-null   float64
 5    Rain Gauge Inches                            6860 non-null   int64  
 6    Avg Wind Speed 0002 NW  SE                   3577 non-null   float64
 7    Avg Wind Speed 0002 SE  SE                   3577 non-null   float64
 8    Avg Wind Speed 0006 NW  SE                   3381 non-null   float64
 9    Avg Wind Speed 0006 SE  SE       

# Data Preprocessing
Quality issues
- Columns with NaNs: Dropping columns with more than 1000 NaNs. Then imputing column values with a KNN imputer using the mean from the 5 nearest values
  - Could potentially check each column to see if mean/mode/constant would be better to impute 

In [6]:
#cols_to_remove = []
#for column in df.columns:
#    x = df[column].isna().sum()
#    if x >= 4000:
#        cols_to_remove.append(column)
        #print(f'{column}: {x}')

In [7]:
#separate targets
target_df = df['scrub_id']
df.drop(['scrub_id'], axis=1, inplace=True)
#create df with few NaNs
df_low_nan = df.copy()
#find columns where NaNs are greater than 1000
cols_to_remove = [column for column in df_low_nan.columns if df_low_nan[column].isna().sum()>=1000]
df_low_nan.drop(cols_to_remove, axis=1, inplace=True)


In [8]:
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

def make_data(df, target_df, imputer):
    #splits data, imputes missing values, normalizes it and then returns data ready to be put in data laoders
    X_train, X_test, y_train, y_test = train_test_split(df, target_df,random_state=42)

    pipe = Pipeline([('imputer', imputer), ('normalizer', Normalizer())])
    train_df = pd.DataFrame(data=pipe.fit_transform(X_train),columns=df.columns)
    train_data = []
    for i in range(len(train_df.to_numpy())):
        train_data.append([train_df.to_numpy()[i], y_train.to_numpy()[i]])

    test_df = pd.DataFrame(data=pipe.fit_transform(X_test), columns=df.columns)
    test_data = []
    for i in range(len(test_df.to_numpy())):
        test_data.append([test_df.to_numpy()[i], y_test.to_numpy()[i]])
    
    return train_data, test_data



In [9]:
train_data, test_data = make_data(df_low_nan, target_df, KNNImputer(n_neighbors=5))
#train_data

# Data Loaders

In [10]:
def create_loaders(train_data, test_data, batch_size, valid_size):
    """
    Takes in training and test data and creates DataLoader objects
    Inputs:
        train_data: Transformed training data (must be torchvision.datasets.folder object (potentially only an ImageFolder))
        test_data: Transformed testing data (must be torchvision.datasets.folder object (potentially only an ImageFolder))
        batch_size: How many samples per batch (int)
        valid_size: Percentage of training set to use for validation (float)
    Outputs:
        train_loader: Training data loader (DataLoader object)
        valid_loader: Validation data loader (DataLoader object)
        test_loader: Testing data loader (DataLoader object)
    """

    #obtain indices for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size*num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    #samplers to select train/valid batches
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_idx)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(valid_idx)

    #create data loaders by combining data and samplers
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
    valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

    return train_loader, valid_loader, test_loader

train_loader, valid_loader, test_loader = create_loaders(train_data, test_data, batch_size=16, valid_size=0.2)
loaders = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}

In [11]:
#Used to check data format and that each input is an observation and its corresponding label - uncomment if you want to see the data
#train_iter = iter(train_loader)
#data, label = train_iter.next()
#print(data.float(),label)
#print(len(data[0]))

### Initialize cuda 

In [12]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('Train on GPU')
else:
    print("Train on CPU")

Train on GPU


### Build Network

In [13]:
class Network(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.fc1 = nn.Linear(30,20)
        self.fc2 = nn.Linear(20,10)
        self.fc3 = nn.Linear(10,2)

        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = x.view(x.shape[0], -1)

        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x
model_low_nan = Network()
model_low_nan



Network(
  (fc1): Linear(in_features=30, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

### Loss Function and Optimizer

In [14]:
criterion = nn.CrossEntropyLoss()
def get_optimizer(model):
    return torch.optim.Adam(model.parameters(), lr=0.001)

### Train Model

In [15]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    model.cuda()
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        # set the module to training mode
        model.train()
        for data, target in loaders['train']:
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            data = data.float()
            #clear gradients
            optimizer.zero_grad()
            #forward pass
            output = model(data)
            #calculate loss for batch
            loss = criterion(output, target)
            #backward pass
            loss.backward()
            #take optimization step
            optimizer.step()
            #update training loss
            train_loss += loss.item()*data.size(0)    

        ######################    
        # validate the model #
        ######################
        # set the model to evaluation mode
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            data = data.float()
            #forward pass
            output = model(data)
            #calculate loss
            loss = criterion(output, target)
            #update validation loss
            valid_loss += loss.item()*data.size(0)

        train_loss = train_loss/len(loaders['train'].sampler)
        valid_loss = valid_loss/len(loaders['valid'].sampler)

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))

        if valid_loss <= valid_loss_min:
            print(f'Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}. Saving model...')
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
        

        
        
    return model


In [16]:
model_low_nan = train(200, loaders, model_low_nan, get_optimizer(model_low_nan), criterion, use_cuda, 'model_low_nan.pt')

Epoch: 1 	Training Loss: 0.542642 	Validation Loss: 0.501396
Validation loss decreased (inf --> 0.501396. Saving model...
Epoch: 2 	Training Loss: 0.488750 	Validation Loss: 0.490333
Validation loss decreased (0.501396 --> 0.490333. Saving model...
Epoch: 3 	Training Loss: 0.489475 	Validation Loss: 0.484752
Validation loss decreased (0.490333 --> 0.484752. Saving model...
Epoch: 4 	Training Loss: 0.481310 	Validation Loss: 0.478633
Validation loss decreased (0.484752 --> 0.478633. Saving model...
Epoch: 5 	Training Loss: 0.481232 	Validation Loss: 0.476521
Validation loss decreased (0.478633 --> 0.476521. Saving model...
Epoch: 6 	Training Loss: 0.478903 	Validation Loss: 0.475796
Validation loss decreased (0.476521 --> 0.475796. Saving model...
Epoch: 7 	Training Loss: 0.472853 	Validation Loss: 0.471307
Validation loss decreased (0.475796 --> 0.471307. Saving model...
Epoch: 8 	Training Loss: 0.473271 	Validation Loss: 0.469980
Validation loss decreased (0.471307 --> 0.469980. Savin

Epoch: 87 	Training Loss: 0.439959 	Validation Loss: 0.418356
Epoch: 88 	Training Loss: 0.439847 	Validation Loss: 0.418458
Epoch: 89 	Training Loss: 0.433792 	Validation Loss: 0.422937
Epoch: 90 	Training Loss: 0.439297 	Validation Loss: 0.415852
Epoch: 91 	Training Loss: 0.437502 	Validation Loss: 0.419907
Epoch: 92 	Training Loss: 0.443818 	Validation Loss: 0.416989
Epoch: 93 	Training Loss: 0.435739 	Validation Loss: 0.417537
Epoch: 94 	Training Loss: 0.431923 	Validation Loss: 0.410680
Validation loss decreased (0.412316 --> 0.410680. Saving model...
Epoch: 95 	Training Loss: 0.428280 	Validation Loss: 0.415722
Epoch: 96 	Training Loss: 0.436084 	Validation Loss: 0.421808
Epoch: 97 	Training Loss: 0.433844 	Validation Loss: 0.415830
Epoch: 98 	Training Loss: 0.432905 	Validation Loss: 0.413845
Epoch: 99 	Training Loss: 0.434854 	Validation Loss: 0.413732
Epoch: 100 	Training Loss: 0.432415 	Validation Loss: 0.412318
Epoch: 101 	Training Loss: 0.433774 	Validation Loss: 0.407142
Va

### Test Model

In [17]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    # set the module to evaluation mode
    model.eval()

    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        data = data.float()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data.item() - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# load the model that got the best validation accuracy
model_low_nan.load_state_dict(torch.load('model_low_nan.pt'))
test(loaders, model_low_nan, criterion, use_cuda)

Test Loss: 0.381627


Test Accuracy: 84% (1448/1715)


### Model with imputed NaNs

Since there are so many missing NaNs three different imputing approaches will be taken
1. Median Imputation
2. KNN Imputation
3. Iterative Imputation

In [18]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

train_data, test_data = make_data(df, target_df, SimpleImputer(strategy='median'))
train_loader, valid_loader, test_loader = create_loaders(train_data, test_data, batch_size=16, valid_size=0.2)
loaders = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}

In [19]:
class Network2(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(174,130)
        self.fc2 = nn.Linear(130,90)
        self.fc3 = nn.Linear(90,50)
        self.fc4 = nn.Linear(50,2)

        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = x.view(x.shape[0], -1)

        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)

        return x
model_median = Network2()
model_KNN = Network2()
model_iter = Network2()

In [20]:
model_median = train(200, loaders, model_median, get_optimizer(model_median), criterion, use_cuda, 'model_median.pt')

Epoch: 1 	Training Loss: 0.512313 	Validation Loss: 0.490435
Validation loss decreased (inf --> 0.490435. Saving model...
Epoch: 2 	Training Loss: 0.504642 	Validation Loss: 0.482513
Validation loss decreased (0.490435 --> 0.482513. Saving model...
Epoch: 3 	Training Loss: 0.494854 	Validation Loss: 0.471092
Validation loss decreased (0.482513 --> 0.471092. Saving model...
Epoch: 4 	Training Loss: 0.472877 	Validation Loss: 0.437304
Validation loss decreased (0.471092 --> 0.437304. Saving model...
Epoch: 5 	Training Loss: 0.456984 	Validation Loss: 0.469665
Epoch: 6 	Training Loss: 0.439307 	Validation Loss: 0.451498
Epoch: 7 	Training Loss: 0.433687 	Validation Loss: 0.397753
Validation loss decreased (0.437304 --> 0.397753. Saving model...
Epoch: 8 	Training Loss: 0.431826 	Validation Loss: 0.448947
Epoch: 9 	Training Loss: 0.419631 	Validation Loss: 0.503564
Epoch: 10 	Training Loss: 0.417647 	Validation Loss: 0.394131
Validation loss decreased (0.397753 --> 0.394131. Saving model..

Epoch: 103 	Training Loss: 0.352918 	Validation Loss: 0.315456
Epoch: 104 	Training Loss: 0.350202 	Validation Loss: 0.314188
Validation loss decreased (0.315253 --> 0.314188. Saving model...
Epoch: 105 	Training Loss: 0.346669 	Validation Loss: 0.332665
Epoch: 106 	Training Loss: 0.361366 	Validation Loss: 0.322518
Epoch: 107 	Training Loss: 0.349904 	Validation Loss: 0.334565
Epoch: 108 	Training Loss: 0.365087 	Validation Loss: 0.321393
Epoch: 109 	Training Loss: 0.355435 	Validation Loss: 0.318710
Epoch: 110 	Training Loss: 0.362328 	Validation Loss: 0.319847
Epoch: 111 	Training Loss: 0.349640 	Validation Loss: 0.323467
Epoch: 112 	Training Loss: 0.352515 	Validation Loss: 0.324956
Epoch: 113 	Training Loss: 0.346944 	Validation Loss: 0.321397
Epoch: 114 	Training Loss: 0.347260 	Validation Loss: 0.318327
Epoch: 115 	Training Loss: 0.349696 	Validation Loss: 0.320439
Epoch: 116 	Training Loss: 0.352990 	Validation Loss: 0.320238
Epoch: 117 	Training Loss: 0.344201 	Validation Loss

In [21]:
model_median.load_state_dict(torch.load('model_median.pt'))
test(loaders, model_median, criterion, use_cuda)

Test Loss: 0.257980


Test Accuracy: 90% (1544/1715)


### KNN Model

In [22]:
train_data, test_data = make_data(df, target_df, KNNImputer(n_neighbors=5))
train_loader, valid_loader, test_loader = create_loaders(train_data, test_data, batch_size=16, valid_size=0.2)
loaders = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}
model_KNN = train(200, loaders, model_KNN, get_optimizer(model_KNN), criterion, use_cuda, 'model_KNN.pt')

Epoch: 1 	Training Loss: 0.516918 	Validation Loss: 0.509519
Validation loss decreased (inf --> 0.509519. Saving model...
Epoch: 2 	Training Loss: 0.496750 	Validation Loss: 0.495660
Validation loss decreased (0.509519 --> 0.495660. Saving model...
Epoch: 3 	Training Loss: 0.484602 	Validation Loss: 0.477047
Validation loss decreased (0.495660 --> 0.477047. Saving model...
Epoch: 4 	Training Loss: 0.461894 	Validation Loss: 0.490037
Epoch: 5 	Training Loss: 0.443528 	Validation Loss: 0.414561
Validation loss decreased (0.477047 --> 0.414561. Saving model...
Epoch: 6 	Training Loss: 0.439593 	Validation Loss: 0.413834
Validation loss decreased (0.414561 --> 0.413834. Saving model...
Epoch: 7 	Training Loss: 0.425073 	Validation Loss: 0.406548
Validation loss decreased (0.413834 --> 0.406548. Saving model...
Epoch: 8 	Training Loss: 0.418819 	Validation Loss: 0.412533
Epoch: 9 	Training Loss: 0.419371 	Validation Loss: 0.422283
Epoch: 10 	Training Loss: 0.422314 	Validation Loss: 0.39977

Epoch: 102 	Training Loss: 0.337270 	Validation Loss: 0.296873
Epoch: 103 	Training Loss: 0.326066 	Validation Loss: 0.280684
Validation loss decreased (0.282688 --> 0.280684. Saving model...
Epoch: 104 	Training Loss: 0.326790 	Validation Loss: 0.277931
Validation loss decreased (0.280684 --> 0.277931. Saving model...
Epoch: 105 	Training Loss: 0.315144 	Validation Loss: 0.264161
Validation loss decreased (0.277931 --> 0.264161. Saving model...
Epoch: 106 	Training Loss: 0.333078 	Validation Loss: 0.336608
Epoch: 107 	Training Loss: 0.330195 	Validation Loss: 0.276710
Epoch: 108 	Training Loss: 0.327115 	Validation Loss: 0.288778
Epoch: 109 	Training Loss: 0.323421 	Validation Loss: 0.306237
Epoch: 110 	Training Loss: 0.324067 	Validation Loss: 0.302803
Epoch: 111 	Training Loss: 0.325541 	Validation Loss: 0.284047
Epoch: 112 	Training Loss: 0.322377 	Validation Loss: 0.275549
Epoch: 113 	Training Loss: 0.317272 	Validation Loss: 0.291535
Epoch: 114 	Training Loss: 0.332539 	Validatio

In [23]:
model_KNN.load_state_dict(torch.load('model_KNN.pt'))
test(loaders, model_KNN, criterion, use_cuda)

Test Loss: 0.251692


Test Accuracy: 90% (1545/1715)


### Iter Model

In [24]:
train_data, test_data = make_data(df, target_df, IterativeImputer())
train_loader, valid_loader, test_loader = create_loaders(train_data, test_data, batch_size=16, valid_size=0.2)
loaders = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}
model_iter = train(200, loaders, model_iter, get_optimizer(model_iter), criterion, use_cuda, 'model_iter.pt')

Epoch: 1 	Training Loss: 0.520616 	Validation Loss: 0.509352
Validation loss decreased (inf --> 0.509352. Saving model...
Epoch: 2 	Training Loss: 0.499205 	Validation Loss: 0.493779
Validation loss decreased (0.509352 --> 0.493779. Saving model...
Epoch: 3 	Training Loss: 0.488492 	Validation Loss: 0.499518
Epoch: 4 	Training Loss: 0.463698 	Validation Loss: 0.446626
Validation loss decreased (0.493779 --> 0.446626. Saving model...
Epoch: 5 	Training Loss: 0.442005 	Validation Loss: 0.412078
Validation loss decreased (0.446626 --> 0.412078. Saving model...
Epoch: 6 	Training Loss: 0.420981 	Validation Loss: 0.394889
Validation loss decreased (0.412078 --> 0.394889. Saving model...
Epoch: 7 	Training Loss: 0.416764 	Validation Loss: 0.387688
Validation loss decreased (0.394889 --> 0.387688. Saving model...
Epoch: 8 	Training Loss: 0.407111 	Validation Loss: 0.459397
Epoch: 9 	Training Loss: 0.410634 	Validation Loss: 0.446147
Epoch: 10 	Training Loss: 0.405852 	Validation Loss: 0.37358

Epoch: 99 	Training Loss: 0.257543 	Validation Loss: 0.221669
Validation loss decreased (0.224475 --> 0.221669. Saving model...
Epoch: 100 	Training Loss: 0.254532 	Validation Loss: 0.227360
Epoch: 101 	Training Loss: 0.257024 	Validation Loss: 0.234554
Epoch: 102 	Training Loss: 0.261890 	Validation Loss: 0.233743
Epoch: 103 	Training Loss: 0.249801 	Validation Loss: 0.225708
Epoch: 104 	Training Loss: 0.249635 	Validation Loss: 0.229011
Epoch: 105 	Training Loss: 0.251282 	Validation Loss: 0.217928
Validation loss decreased (0.221669 --> 0.217928. Saving model...
Epoch: 106 	Training Loss: 0.251573 	Validation Loss: 0.221297
Epoch: 107 	Training Loss: 0.251609 	Validation Loss: 0.217237
Validation loss decreased (0.217928 --> 0.217237. Saving model...
Epoch: 108 	Training Loss: 0.256248 	Validation Loss: 0.221468
Epoch: 109 	Training Loss: 0.253262 	Validation Loss: 0.248782
Epoch: 110 	Training Loss: 0.245555 	Validation Loss: 0.217241
Epoch: 111 	Training Loss: 0.254129 	Validation

In [25]:
model_iter.load_state_dict(torch.load('model_iter.pt'))
test(loaders, model_iter, criterion, use_cuda)

Test Loss: 0.322165


Test Accuracy: 85% (1469/1715)


### Findings
Most models have similar accuracy on the imputed data. The median imputation takes the shortest and iterative imputation takes the longest.

More data would greatly aid the first model as the accuracy from the first model was low compared to the last three models (since the first model only had 30 features to train on). Even though the last 3 models had more accuracy, it may still be better to utilize the first model since extensive data was imputed on the last 3 and may not hold up to more real world data.