In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
df = pd.read_csv('csv_flight/df_nums.csv')
df.dtypes

Year                                 int64
Quarter                              int64
Month                                int64
DayofMonth                           int64
DayOfWeek                            int64
Tail_Number                          int64
Flight_Number_Reporting_Airline      int64
OriginAirportSeqID                   int64
Origin                               int64
OriginCityName                       int64
OriginState                          int64
DestAirportSeqID                     int64
Dest                                 int64
DestCityName                         int64
DestState                            int64
CRSDepTime                           int64
CRSArrTime                           int64
Cancelled                          float64
Diverted                           float64
CRSElapsedTime                     float64
Distance                           float64
is_holiday_week                      int64
OriginFlightDensity                float64
Visibility 

#### Binary Classification

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
X = df.iloc[:, 0:27].values
scaler = StandardScaler()
X = scaler.fit_transform(X)

y = df.iloc[:, 28].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([1147599, 27])
Shape of X_val: torch.Size([245914, 27])
Shape of X_test: torch.Size([245915, 27])
Shape of y_train: torch.Size([1147599])
Shape of y_val: torch.Size([245914])
Shape of y_test: torch.Size([245915])


In [5]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_hidden_layers=15):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = F.relu(layer(x))
        x = self.layers[-1](x)
        return x

In [6]:
model = SimpleNN(input_size=27, hidden_size=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
loss_function = nn.BCEWithLogitsLoss()

In [7]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs.squeeze(), train_labels)
        train_predictions = torch.sigmoid(train_outputs).squeeze() > 0.5
        train_correct = (train_predictions == train_labels).sum().item()
        train_accuracy = train_correct / len(train_labels)
        train_loss.backward()
        optimizer.step()
        
        if epoch % 300 == 0:
            model.eval()
            with torch.no_grad():
                val_outputs = model(val_features)
                val_loss = loss_function(val_outputs.squeeze(), val_labels)
                val_predictions = torch.sigmoid(val_outputs).squeeze() > 0.5
                val_correct = (val_predictions == val_labels).sum().item()
                val_accuracy = val_correct / len(val_labels)
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=1400)

Epoch 1, Train Loss: 0.7096, Train Accuracy: 19.85%, Val Loss: 0.7088, Val Accuracy: 19.91%
Epoch 301, Train Loss: 0.4753, Train Accuracy: 80.15%, Val Loss: 0.4756, Val Accuracy: 80.09%
Epoch 601, Train Loss: 0.4668, Train Accuracy: 80.34%, Val Loss: 0.4685, Val Accuracy: 80.27%
Epoch 901, Train Loss: 0.4630, Train Accuracy: 80.43%, Val Loss: 0.4657, Val Accuracy: 80.31%
Epoch 1201, Train Loss: 0.4606, Train Accuracy: 80.50%, Val Loss: 0.4643, Val Accuracy: 80.30%


In [8]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs.squeeze(), test_labels)
        test_predictions = torch.sigmoid(test_outputs).squeeze() > 0.5
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 0.4639, Test Accuracy: 80.38%


#### Multiclass Classification

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [10]:
X = df.iloc[:, 0:27].values
scaler = StandardScaler()
X = scaler.fit_transform(X)

y = df.iloc[:, 29].values

# create 70% traning, 15% validation, 15% test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=123)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=123)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([1147599, 27])
Shape of X_val: torch.Size([245914, 27])
Shape of X_test: torch.Size([245915, 27])
Shape of y_train: torch.Size([1147599])
Shape of y_val: torch.Size([245914])
Shape of y_test: torch.Size([245915])


In [11]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=14, num_hidden_layers=15):
        super(SimpleNN, self).__init__()
        # first layer
        layers = [nn.Linear(input_size, hidden_size)]
        
        for _ in range(num_hidden_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
        
        # output layer
        layers.append(nn.Linear(hidden_size, output_size))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = F.relu(layer(x))
        x = self.layers[-1](x)
        return x

In [12]:
model = SimpleNN(input_size=27, hidden_size=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
loss_function = nn.CrossEntropyLoss()

In [13]:
def train(model, train_features, train_labels, val_features, val_labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        train_outputs = model(train_features)
        train_loss = loss_function(train_outputs, train_labels)
        _, train_predictions = torch.max(train_outputs, 1)
        train_correct = (train_predictions == train_labels).sum().item()
        train_accuracy = train_correct / len(train_labels)
        train_loss.backward()
        optimizer.step()
        
        if epoch % 300 == 0:
            model.eval()
            with torch.no_grad():
                val_outputs = model(val_features)
                val_loss = loss_function(val_outputs, val_labels)
                _, val_predictions = torch.max(val_outputs, 1)
                val_correct = (val_predictions == val_labels).sum().item()
                val_accuracy = val_correct / len(val_labels)
            print(f'Epoch {epoch+1}, Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, '
                    f'Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy * 100:.2f}%')

train(model, X_train, y_train, X_val, y_val, optimizer, loss_function, epochs=6000)

Epoch 1, Train Loss: 2.6342, Train Accuracy: 0.00%, Val Loss: 2.6322, Val Accuracy: 0.00%
Epoch 301, Train Loss: 1.3651, Train Accuracy: 58.17%, Val Loss: 1.3687, Val Accuracy: 57.91%
Epoch 601, Train Loss: 1.3517, Train Accuracy: 58.17%, Val Loss: 1.3553, Val Accuracy: 57.91%
Epoch 901, Train Loss: 1.3451, Train Accuracy: 58.17%, Val Loss: 1.3494, Val Accuracy: 57.91%
Epoch 1201, Train Loss: 1.3398, Train Accuracy: 58.17%, Val Loss: 1.3447, Val Accuracy: 57.91%
Epoch 1501, Train Loss: 1.3359, Train Accuracy: 58.17%, Val Loss: 1.3413, Val Accuracy: 57.91%
Epoch 1801, Train Loss: 1.3316, Train Accuracy: 58.20%, Val Loss: 1.3373, Val Accuracy: 57.94%
Epoch 2101, Train Loss: 1.3283, Train Accuracy: 58.22%, Val Loss: 1.3345, Val Accuracy: 57.94%
Epoch 2401, Train Loss: 1.3235, Train Accuracy: 58.23%, Val Loss: 1.3300, Val Accuracy: 57.96%
Epoch 2701, Train Loss: 1.3210, Train Accuracy: 58.24%, Val Loss: 1.3281, Val Accuracy: 57.96%
Epoch 3001, Train Loss: 1.3194, Train Accuracy: 58.24%, Va

In [14]:
def test(model, test_features, test_labels, loss_function):
    model.eval() 
    with torch.no_grad():  
        test_outputs = model(test_features)
        test_loss = loss_function(test_outputs, test_labels)
        _, test_predictions = torch.max(test_outputs, 1)
        test_correct = (test_predictions == test_labels).sum().item()
        test_accuracy = test_correct / len(test_labels)
        
    print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')
    
test(model, X_test, y_test, loss_function)

Test Loss: 1.3193, Test Accuracy: 58.25%
