In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
df = pd.read_csv('csv_flight/df_nums.csv')
# drop needless columns
df = df.drop(columns = ['Tail_Number', 'Flight_Number_Reporting_Airline', 'OriginAirportSeqID', 'DepartureDensity', 
                        'ArrivalDensity', 'OriginCityName', 'OriginState', 'DestAirportSeqID', 'DestCityName', 'DestState'])
df.dtypes

Year                 int64
Quarter              int64
Month                int64
DayofMonth           int64
DayOfWeek            int64
Origin               int64
Dest                 int64
CRSDepTime           int64
CRSArrTime           int64
Cancelled          float64
Diverted           float64
CRSElapsedTime     float64
Distance           float64
is_holiday_week      int64
TotalDensity       float64
Visibility         float64
WindSpeed          float64
SevereWeather        int64
BadWeather           int64
DepDelay           float64
delay_binary         int64
delay_interval     float64
dtype: object

In [3]:
# non-categorical columns to scale
columns_to_scale = ['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'CRSDepTime', 'CRSArrTime', 'CRSElapsedTime', 
                    'Distance', 'TotalDensity', 'Visibility', 'WindSpeed']
scaler = StandardScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

In [4]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,Origin,Dest,CRSDepTime,CRSArrTime,Cancelled,Diverted,CRSElapsedTime,Distance,is_holiday_week,TotalDensity,Visibility,WindSpeed,SevereWeather,BadWeather,DepDelay,delay_binary,delay_interval
0,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,0,-1.431226,-1.514368,0.0,0.0,-0.41064,-0.599546,1,-1.96434,0.334387,-0.724781,0,0,-3.0,0,
1,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,1,-1.230794,-1.302969,0.0,0.0,-0.27247,-0.521488,1,-1.33407,0.334387,-0.724781,0,0,-2.0,0,
2,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,2,-1.216763,-1.332061,0.0,0.0,-0.610219,-0.753804,1,-1.223773,0.334387,-0.724781,0,0,2.0,0,0.0
3,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,3,-1.010318,-0.965507,0.0,0.0,1.938251,1.738485,1,0.099795,0.334387,-1.089562,0,0,21.0,1,1.0
4,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,4,-1.000297,-0.899566,0.0,0.0,2.383465,2.095323,1,0.147065,0.334387,-1.089562,0,0,-2.0,0,


In [5]:
df.shape

(1639428, 22)

#### Binary Classification

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
# check potential imbalance in the target
print(df['delay_binary'].value_counts())

delay_binary
0    1313698
1     325730
Name: count, dtype: int64


In [9]:
df_majority = df[df['delay_binary'] == 0]
df_minority = df[df['delay_binary'] == 1]

# downsample majority class
df_majority_downsampled = df_majority.sample(n=len(df_minority), random_state=123)
# combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])
# display new class counts
print(df_downsampled['delay_binary'].value_counts())

delay_binary
0    325730
1    325730
Name: count, dtype: int64


In [10]:
X_downsampled = df_downsampled.iloc[:, 0:19].values
y_downsampled = df_downsampled.iloc[:, 20].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X_downsampled, y_downsampled, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([456022, 19])
Shape of X_val: torch.Size([97719, 19])
Shape of X_test: torch.Size([97719, 19])
Shape of y_train: torch.Size([456022])
Shape of y_val: torch.Size([97719])
Shape of y_test: torch.Size([97719])


In [11]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=5):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [12]:
model = SimpleNN(input_size=19, hidden_size=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.BCEWithLogitsLoss()

In [13]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs.squeeze(), train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs.squeeze(), val_labels)
        scheduler.step(val_loss)
        
        if epoch % 200 == 0:
            train_predictions = torch.sigmoid(train_outputs).squeeze() > 0.5
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)

            val_predictions = torch.sigmoid(val_outputs).squeeze() > 0.5
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=2001)

Epoch 1, Train Loss: 0.7444, Train Accuracy: 50.77%, Val Loss: 0.7968, Val Accuracy: 42.74%
Epoch 201, Train Loss: 0.6405, Train Accuracy: 63.27%, Val Loss: 0.6404, Val Accuracy: 62.69%
Epoch 401, Train Loss: 0.6335, Train Accuracy: 64.01%, Val Loss: 0.6341, Val Accuracy: 63.73%
Epoch 601, Train Loss: 0.6312, Train Accuracy: 64.27%, Val Loss: 0.6324, Val Accuracy: 64.06%
Epoch 801, Train Loss: 0.6296, Train Accuracy: 64.44%, Val Loss: 0.6316, Val Accuracy: 64.16%
Epoch 1001, Train Loss: 0.6289, Train Accuracy: 64.51%, Val Loss: 0.6314, Val Accuracy: 64.17%
Epoch 1201, Train Loss: 0.6288, Train Accuracy: 64.51%, Val Loss: 0.6313, Val Accuracy: 64.27%
Epoch 1401, Train Loss: 0.6285, Train Accuracy: 64.56%, Val Loss: 0.6313, Val Accuracy: 64.20%
Epoch 1601, Train Loss: 0.6286, Train Accuracy: 64.52%, Val Loss: 0.6313, Val Accuracy: 64.24%
Epoch 1801, Train Loss: 0.6287, Train Accuracy: 64.52%, Val Loss: 0.6313, Val Accuracy: 64.21%
Epoch 2001, Train Loss: 0.6288, Train Accuracy: 64.55%, V

In [14]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs.squeeze(), test_labels)
        test_predictions = torch.sigmoid(test_outputs).squeeze() > 0.5
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 0.6333, Test Accuracy: 64.02%


#### Multiclass Classification

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [17]:
df = df.dropna(axis=0)

X_downsampled = df.iloc[:, 0:19].values
y_downsampled = df.iloc[:, 21].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X_downsampled, y_downsampled, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([480442, 19])
Shape of X_val: torch.Size([102952, 19])
Shape of X_test: torch.Size([102952, 19])
Shape of y_train: torch.Size([480442])
Shape of y_val: torch.Size([102952])
Shape of y_test: torch.Size([102952])


In [18]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=6):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [19]:
model = SimpleNN(input_size=19, hidden_size=64, output_size=7).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.CrossEntropyLoss()

In [20]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs, train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs, val_labels)
        scheduler.step(val_loss)
        
        if epoch % 300 == 0:
            _, train_predictions = torch.max(train_outputs, 1)
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)
            
            _, val_predictions = torch.max(val_outputs, 1)
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=3001)

Epoch 1, Train Loss: 2.1497, Train Accuracy: 15.22%, Val Loss: 3.6163, Val Accuracy: 17.58%
Epoch 301, Train Loss: 1.3085, Train Accuracy: 52.56%, Val Loss: 1.3075, Val Accuracy: 52.71%
Epoch 601, Train Loss: 1.3019, Train Accuracy: 52.61%, Val Loss: 1.3015, Val Accuracy: 52.74%
Epoch 901, Train Loss: 1.2982, Train Accuracy: 52.62%, Val Loss: 1.2973, Val Accuracy: 52.75%
Epoch 1201, Train Loss: 1.2969, Train Accuracy: 52.64%, Val Loss: 1.2969, Val Accuracy: 52.76%
Epoch 1501, Train Loss: 1.2962, Train Accuracy: 52.65%, Val Loss: 1.2967, Val Accuracy: 52.76%
Epoch 1801, Train Loss: 1.2965, Train Accuracy: 52.63%, Val Loss: 1.2967, Val Accuracy: 52.76%
Epoch 2101, Train Loss: 1.2966, Train Accuracy: 52.61%, Val Loss: 1.2967, Val Accuracy: 52.77%
Epoch 2401, Train Loss: 1.2965, Train Accuracy: 52.63%, Val Loss: 1.2967, Val Accuracy: 52.76%
Epoch 2701, Train Loss: 1.2963, Train Accuracy: 52.63%, Val Loss: 1.2967, Val Accuracy: 52.76%
Epoch 3001, Train Loss: 1.2965, Train Accuracy: 52.63%, 

In [21]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs, test_labels)
        _, test_predictions = torch.max(test_outputs, 1)
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 1.2961, Test Accuracy: 52.91%
