In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
df = pd.read_csv('csv_flight/df_nums.csv')
# drop needless columns
df = df.drop(columns = ['Tail_Number', 'Flight_Number_Reporting_Airline', 'OriginAirportSeqID', 'TotalDensity', 
                        'OriginCityName', 'OriginState', 'DestAirportSeqID', 'DestCityName', 'DestState'])
df.dtypes

Year                  int64
Quarter               int64
Month                 int64
DayofMonth            int64
DayOfWeek             int64
Origin                int64
Dest                  int64
CRSDepTime            int64
CRSArrTime            int64
Cancelled           float64
Diverted            float64
CRSElapsedTime      float64
Distance            float64
is_holiday_week       int64
DepartureDensity    float64
ArrivalDensity      float64
Visibility          float64
WindSpeed           float64
SevereWeather         int64
BadWeather            int64
DepDelay            float64
delay_binary          int64
delay_interval        int64
dtype: object

In [3]:
# non-categorical columns to scale
columns_to_scale = ['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'CRSDepTime', 'CRSArrTime', 'CRSElapsedTime', 
                    'Distance', 'DepartureDensity', 'ArrivalDensity', 'Visibility', 'WindSpeed']
scaler = StandardScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

In [4]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,Origin,Dest,CRSDepTime,CRSArrTime,Cancelled,Diverted,CRSElapsedTime,Distance,is_holiday_week,DepartureDensity,ArrivalDensity,Visibility,WindSpeed,SevereWeather,BadWeather,DepDelay,delay_binary,delay_interval
0,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,0,-1.431226,-1.514368,0.0,0.0,-0.41064,-0.599546,1,-2.018231,-1.752739,0.334387,-0.724781,0,0,-3.0,0,0
1,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,1,-1.230794,-1.302969,0.0,0.0,-0.27247,-0.521488,1,-1.69787,-0.900071,0.334387,-0.724781,0,0,-2.0,0,0
2,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,2,-1.216763,-1.332061,0.0,0.0,-0.610219,-0.753804,1,-1.665834,-0.729537,0.334387,-0.724781,0,0,2.0,0,1
3,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,3,-1.010318,-0.965507,0.0,0.0,1.938251,1.738485,1,-0.640681,0.748422,0.334387,-1.089562,0,0,21.0,1,3
4,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,4,-1.000297,-0.899566,0.0,0.0,2.383465,2.095323,1,-0.544573,0.748422,0.334387,-1.089562,0,0,-2.0,0,0


In [5]:
df.shape

(1639428, 23)

#### Binary Classification

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
# check potential imbalance in the target
print(df['delay_binary'].value_counts())

delay_binary
0    1313698
1     325730
Name: count, dtype: int64


In [9]:
df_majority = df[df['delay_binary'] == 0]
df_minority = df[df['delay_binary'] == 1]

# downsample majority class
df_majority_downsampled = df_majority.sample(n=len(df_minority), random_state=123)
# combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])
# display new class counts
print(df_downsampled['delay_binary'].value_counts())

delay_binary
0    325730
1    325730
Name: count, dtype: int64


In [10]:
X_downsampled = df_downsampled.iloc[:, 0:20].values
y_downsampled = df_downsampled.iloc[:, 21].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X_downsampled, y_downsampled, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([456022, 20])
Shape of X_val: torch.Size([97719, 20])
Shape of X_test: torch.Size([97719, 20])
Shape of y_train: torch.Size([456022])
Shape of y_val: torch.Size([97719])
Shape of y_test: torch.Size([97719])


In [11]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=5):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [12]:
model = SimpleNN(input_size=20, hidden_size=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.BCEWithLogitsLoss()

In [13]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs.squeeze(), train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs.squeeze(), val_labels)
        scheduler.step(val_loss)
        
        if epoch % 200 == 0:
            train_predictions = torch.sigmoid(train_outputs).squeeze() > 0.5
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)

            val_predictions = torch.sigmoid(val_outputs).squeeze() > 0.5
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=2001)

Epoch 1, Train Loss: 0.7604, Train Accuracy: 49.94%, Val Loss: 1.7072, Val Accuracy: 49.84%
Epoch 201, Train Loss: 0.6393, Train Accuracy: 63.33%, Val Loss: 0.6397, Val Accuracy: 63.01%
Epoch 401, Train Loss: 0.6348, Train Accuracy: 63.84%, Val Loss: 0.6365, Val Accuracy: 63.42%
Epoch 601, Train Loss: 0.6315, Train Accuracy: 64.22%, Val Loss: 0.6345, Val Accuracy: 63.70%
Epoch 801, Train Loss: 0.6299, Train Accuracy: 64.42%, Val Loss: 0.6338, Val Accuracy: 63.93%
Epoch 1001, Train Loss: 0.6297, Train Accuracy: 64.49%, Val Loss: 0.6336, Val Accuracy: 63.97%
Epoch 1201, Train Loss: 0.6295, Train Accuracy: 64.55%, Val Loss: 0.6335, Val Accuracy: 64.04%
Epoch 1401, Train Loss: 0.6290, Train Accuracy: 64.58%, Val Loss: 0.6336, Val Accuracy: 64.02%
Epoch 1601, Train Loss: 0.6289, Train Accuracy: 64.57%, Val Loss: 0.6336, Val Accuracy: 64.04%
Epoch 1801, Train Loss: 0.6290, Train Accuracy: 64.57%, Val Loss: 0.6336, Val Accuracy: 64.04%
Epoch 2001, Train Loss: 0.6290, Train Accuracy: 64.57%, V

In [14]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs.squeeze(), test_labels)
        test_predictions = torch.sigmoid(test_outputs).squeeze() > 0.5
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 0.6344, Test Accuracy: 63.76%


#### Multiclass Classification

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [17]:
X_downsampled = df.iloc[:, 0:20].values
y_downsampled = df.iloc[:, 22].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X_downsampled, y_downsampled, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([1147599, 20])
Shape of X_val: torch.Size([245914, 20])
Shape of X_test: torch.Size([245915, 20])
Shape of y_train: torch.Size([1147599])
Shape of y_val: torch.Size([245914])
Shape of y_test: torch.Size([245915])


In [18]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=6):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [19]:
model = SimpleNN(input_size=20, hidden_size=64, output_size=9).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.CrossEntropyLoss()

In [20]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs, train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs, val_labels)
        scheduler.step(val_loss)
        
        if epoch % 300 == 0:
            _, train_predictions = torch.max(train_outputs, 1)
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)
            
            _, val_predictions = torch.max(val_outputs, 1)
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=3001)

Epoch 1, Train Loss: 2.4583, Train Accuracy: 8.71%, Val Loss: 2.1101, Val Accuracy: 35.81%
Epoch 301, Train Loss: 1.3357, Train Accuracy: 58.19%, Val Loss: 1.3382, Val Accuracy: 57.93%
Epoch 601, Train Loss: 1.3294, Train Accuracy: 58.20%, Val Loss: 1.3306, Val Accuracy: 57.94%
Epoch 901, Train Loss: 1.3273, Train Accuracy: 58.21%, Val Loss: 1.3288, Val Accuracy: 57.95%
Epoch 1201, Train Loss: 1.3265, Train Accuracy: 58.21%, Val Loss: 1.3280, Val Accuracy: 57.96%
Epoch 1501, Train Loss: 1.3263, Train Accuracy: 58.21%, Val Loss: 1.3279, Val Accuracy: 57.96%
Epoch 1801, Train Loss: 1.3263, Train Accuracy: 58.22%, Val Loss: 1.3277, Val Accuracy: 57.96%
Epoch 2101, Train Loss: 1.3264, Train Accuracy: 58.21%, Val Loss: 1.3278, Val Accuracy: 57.96%
Epoch 2401, Train Loss: 1.3264, Train Accuracy: 58.22%, Val Loss: 1.3278, Val Accuracy: 57.96%
Epoch 2701, Train Loss: 1.3264, Train Accuracy: 58.22%, Val Loss: 1.3277, Val Accuracy: 57.96%
Epoch 3001, Train Loss: 1.3265, Train Accuracy: 58.21%, V

In [21]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs, test_labels)
        _, test_predictions = torch.max(test_outputs, 1)
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 1.3237, Test Accuracy: 58.24%
