In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
df = pd.read_csv('csv_flight/df_nums.csv')
# drop needless columns
df = df.drop(columns = ['Tail_Number', 'Flight_Number_Reporting_Airline', 'OriginAirportSeqID', 'TotalDensity', 
                        'ArrivalDensity', 'OriginCityName', 'OriginState', 'DestAirportSeqID', 'DestCityName', 'DestState'])
df.dtypes

Year                  int64
Quarter               int64
Month                 int64
DayofMonth            int64
DayOfWeek             int64
Origin                int64
Dest                  int64
CRSDepTime            int64
CRSArrTime            int64
Cancelled           float64
Diverted            float64
CRSElapsedTime      float64
Distance            float64
is_holiday_week       int64
DepartureDensity    float64
Visibility          float64
WindSpeed           float64
SevereWeather         int64
BadWeather            int64
DepDelay            float64
delay_binary          int64
delay_interval      float64
dtype: object

In [3]:
# non-categorical columns to scale
columns_to_scale = ['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'CRSDepTime', 'CRSArrTime', 'CRSElapsedTime', 
                    'Distance', 'DepartureDensity', 'Visibility', 'WindSpeed']
scaler = StandardScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

In [4]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,Origin,Dest,CRSDepTime,CRSArrTime,Cancelled,Diverted,CRSElapsedTime,Distance,is_holiday_week,DepartureDensity,Visibility,WindSpeed,SevereWeather,BadWeather,DepDelay,delay_binary,delay_interval
0,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,0,-1.431226,-1.514368,0.0,0.0,-0.41064,-0.599546,1,-2.018231,0.334387,-0.724781,0,0,-3.0,0,
1,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,1,-1.230794,-1.302969,0.0,0.0,-0.27247,-0.521488,1,-1.69787,0.334387,-0.724781,0,0,-2.0,0,
2,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,2,-1.216763,-1.332061,0.0,0.0,-0.610219,-0.753804,1,-1.665834,0.334387,-0.724781,0,0,2.0,0,
3,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,3,-1.010318,-0.965507,0.0,0.0,1.938251,1.738485,1,-0.640681,0.334387,-1.089562,0,0,21.0,1,0.0
4,-1.517272,-1.395768,-1.655277,-1.68001,-0.468058,0,4,-1.000297,-0.899566,0.0,0.0,2.383465,2.095323,1,-0.544573,0.334387,-1.089562,0,0,-2.0,0,


In [5]:
df.shape

(1639428, 22)

#### Binary Classification

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
X = df.iloc[:, 0:19].values
y = df.iloc[:, 20].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([1147599, 19])
Shape of X_val: torch.Size([245914, 19])
Shape of X_test: torch.Size([245915, 19])
Shape of y_train: torch.Size([1147599])
Shape of y_val: torch.Size([245914])
Shape of y_test: torch.Size([245915])


In [8]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=5):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [9]:
model = SimpleNN(input_size=19, hidden_size=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.BCEWithLogitsLoss()

In [10]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs.squeeze(), train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs.squeeze(), val_labels)
        scheduler.step(val_loss)
        
        if epoch % 200 == 0:
            train_predictions = torch.sigmoid(train_outputs).squeeze() > 0.5
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)

            val_predictions = torch.sigmoid(val_outputs).squeeze() > 0.5
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=2001)

Epoch 1, Train Loss: 0.7472, Train Accuracy: 52.59%, Val Loss: 0.5670, Val Accuracy: 80.09%
Epoch 201, Train Loss: 0.4653, Train Accuracy: 80.36%, Val Loss: 0.4634, Val Accuracy: 80.32%
Epoch 401, Train Loss: 0.4608, Train Accuracy: 80.43%, Val Loss: 0.4605, Val Accuracy: 80.36%
Epoch 601, Train Loss: 0.4602, Train Accuracy: 80.48%, Val Loss: 0.4590, Val Accuracy: 80.42%
Epoch 801, Train Loss: 0.4579, Train Accuracy: 80.55%, Val Loss: 0.4580, Val Accuracy: 80.45%
Epoch 1001, Train Loss: 0.4574, Train Accuracy: 80.56%, Val Loss: 0.4576, Val Accuracy: 80.49%
Epoch 1201, Train Loss: 0.4569, Train Accuracy: 80.59%, Val Loss: 0.4574, Val Accuracy: 80.50%
Epoch 1401, Train Loss: 0.4569, Train Accuracy: 80.59%, Val Loss: 0.4571, Val Accuracy: 80.53%
Epoch 1601, Train Loss: 0.4567, Train Accuracy: 80.58%, Val Loss: 0.4572, Val Accuracy: 80.52%
Epoch 1801, Train Loss: 0.4568, Train Accuracy: 80.59%, Val Loss: 0.4571, Val Accuracy: 80.52%
Epoch 2001, Train Loss: 0.4566, Train Accuracy: 80.60%, V

In [11]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs.squeeze(), test_labels)
        test_predictions = torch.sigmoid(test_outputs).squeeze() > 0.5
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 0.4571, Test Accuracy: 80.59%


#### Multiclass Classification

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [21]:
df = df.dropna(axis=0)
print(df['delay_interval'].value_counts())

delay_interval
0.0    119619
1.0    100529
2.0     68756
3.0     32937
4.0      3889
Name: count, dtype: int64


In [22]:
#oversample the data to balance the classes
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=0)

X = df.iloc[:, 0:19].values
y = df.iloc[:, 21].values

X_resampled, y_resampled = ros.fit_resample(X, y)

In [23]:
# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([228011, 19])
Shape of X_val: torch.Size([48859, 19])
Shape of X_test: torch.Size([48860, 19])
Shape of y_train: torch.Size([228011])
Shape of y_val: torch.Size([48859])
Shape of y_test: torch.Size([48860])


In [24]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=6):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            nn.init.kaiming_normal_(layers[-1].weight, nonlinearity='leaky_relu')
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(0.5))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.leaky_relu(x, 0.01) 
            else:
                x = layer(x)
        x = self.layers[-1](x)
        return x

In [25]:
model = SimpleNN(input_size=19, hidden_size=64, output_size=5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=40)
loss_function = nn.CrossEntropyLoss()

In [26]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs, train_labels)
        train_loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_features)
            val_loss = loss_function(val_outputs, val_labels)
        scheduler.step(val_loss)
        
        if epoch % 300 == 0:
            _, train_predictions = torch.max(train_outputs, 1)
            train_correct = (train_predictions == train_labels).sum().item()
            train_accuracy = train_correct / len(train_labels)
            
            _, val_predictions = torch.max(val_outputs, 1)
            val_correct = (val_predictions == val_labels).sum().item()
            val_accuracy = val_correct / len(val_labels)
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=3001)

Epoch 1, Train Loss: 1.7509, Train Accuracy: 21.99%, Val Loss: 3.6980, Val Accuracy: 1.32%
Epoch 301, Train Loss: 1.3226, Train Accuracy: 37.43%, Val Loss: 1.3238, Val Accuracy: 36.81%
Epoch 601, Train Loss: 1.3174, Train Accuracy: 37.67%, Val Loss: 1.3218, Val Accuracy: 37.10%
Epoch 901, Train Loss: 1.3157, Train Accuracy: 37.75%, Val Loss: 1.3222, Val Accuracy: 37.08%
Epoch 1201, Train Loss: 1.3150, Train Accuracy: 37.74%, Val Loss: 1.3223, Val Accuracy: 37.09%
Epoch 1501, Train Loss: 1.3151, Train Accuracy: 37.72%, Val Loss: 1.3222, Val Accuracy: 37.08%
Epoch 1801, Train Loss: 1.3153, Train Accuracy: 37.71%, Val Loss: 1.3222, Val Accuracy: 37.08%
Epoch 2101, Train Loss: 1.3150, Train Accuracy: 37.75%, Val Loss: 1.3223, Val Accuracy: 37.09%
Epoch 2401, Train Loss: 1.3154, Train Accuracy: 37.76%, Val Loss: 1.3222, Val Accuracy: 37.09%
Epoch 2701, Train Loss: 1.3149, Train Accuracy: 37.69%, Val Loss: 1.3223, Val Accuracy: 37.09%
Epoch 3001, Train Loss: 1.3152, Train Accuracy: 37.73%, V

In [27]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs, test_labels)
        _, test_predictions = torch.max(test_outputs, 1)
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 1.3250, Test Accuracy: 37.47%
