# Classification MLP vs. Logistic Regression

In this notebook, I will compare the performance of shallow MLPs to that of a logistic regression model.

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# import from utils (a folder in the parent directory)
import sys

sys.path.append("../..")
from utils import MLP_generator

# reset the path
sys.path.append("Projects/Deep_Learning_MLPs")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torch.utils.tensorboard import SummaryWriter

In [18]:
data = pd.read_csv('data/covtype.data', header=None)
data.shape

(581012, 55)

In [19]:
columns = ['elevation', 'aspect', 'slope', 'hor_dist_to_hydro', 'vert_dist_to_hydro', 'hor_dist_to_roadways', 'hillshade_9am', 'hillshade_noon', 'hillshade_3pm', 'hor_dist_to_fire_points', 'wilderness_area_1', 'wilderness_area_2', 'wilderness_area_3', 'wilderness_area_4']

soil_columns = ['soil_type_' + str(i) for i in range(1, 41)]

columns = columns + soil_columns + ['cover_type']

In [20]:
data.columns = columns
data.head()

Unnamed: 0,elevation,aspect,slope,hor_dist_to_hydro,vert_dist_to_hydro,hor_dist_to_roadways,hillshade_9am,hillshade_noon,hillshade_3pm,hor_dist_to_fire_points,...,soil_type_32,soil_type_33,soil_type_34,soil_type_35,soil_type_36,soil_type_37,soil_type_38,soil_type_39,soil_type_40,cover_type
0,2596,51,3,258,0,510,221,232,148,6279,...,0,0,0,0,0,0,0,0,0,5
1,2590,56,2,212,-6,390,220,235,151,6225,...,0,0,0,0,0,0,0,0,0,5
2,2804,139,9,268,65,3180,234,238,135,6121,...,0,0,0,0,0,0,0,0,0,2
3,2785,155,18,242,118,3090,238,238,122,6211,...,0,0,0,0,0,0,0,0,0,2
4,2595,45,2,153,-1,391,220,234,150,6172,...,0,0,0,0,0,0,0,0,0,5


In [22]:
X = data.drop('cover_type', axis=1)
y = data['cover_type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
standardized_columns = [
    "elevation",
    "aspect",
    "slope",
    "hor_dist_to_hydro",
    "vert_dist_to_hydro",
    "hor_dist_to_roadways",
    "hor_dist_to_fire_points",
]

normalized_columns = ["hillshade_9am", "hillshade_noon", "hillshade_3pm"]

In [4]:
standard_scaler = StandardScaler()
normalizer = MinMaxScaler(feature_range=(0, 255))

preprocessor = ColumnTransformer(
    remainder="passthrough",  # passthough features not listed
    transformers=[
        ("std", standard_scaler, standardized_columns),
        ("norm", normalizer, normalized_columns),
    ],
)

## Baseline: Logistic Regression Model

In [5]:
logreg_model = LogisticRegression(
    multi_class="multinomial", solver="sag", penalty=None, random_state=0, max_iter=1500
)

logreg_pipe = Pipeline(steps=[("preprocessor", preprocessor), ("logreg", logreg_model)])

In [6]:
logreg_pipe.fit(X_train, y_train)



In [7]:
y_pred_logreg = logreg_pipe.predict(X_test)

log_reg_accuracy = accuracy_score(y_test, y_pred_logreg)
log_reg_accuracy

0.5807079796144847

In [8]:
lda_model = LinearDiscriminantAnalysis()

lda_pipe = Pipeline(steps=[("preprocessor", preprocessor), ("lda", lda_model)])

In [9]:
lda_pipe.fit(X_train, y_train)

In [10]:
y_pred_lda = lda_pipe.predict(X_test)

lda_accuracy = accuracy_score(y_test, y_pred_lda)
lda_accuracy

0.5838145794603917

## Multi-Layer Perceptron

In [28]:
class CustomDataset(Dataset):
    def __init__(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def __len__(self):
        return len(self.X_train)

    def __getitem__(self, index):
        x = self.X_train.iloc[index]  # Assuming X_train is a pandas DataFrame
        y = self.y_train.iloc[index]  # Assuming y_train is a pandas Series

        # Adjust the target value by subtracting 1
        y -= 1
        
        # Convert the variables to tensors
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.int64)


        return x, y


train_dataset = CustomDataset(X_train, y_train)
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [29]:
torch.manual_seed(0)
mlp = nn.Sequential(OrderedDict([
    ('input1', nn.Linear(54, 100)),
    ('relu1', nn.ReLU()),
    ('output', nn.Linear(100, 7)),
    ('output_act', nn.Sigmoid()),
    ('softmax', nn.Softmax(dim=1)),
]))

# Define the loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(mlp.parameters(), lr=5e-3)

writer = SummaryWriter('runs/classification_mlp')


# Run the training loop
for epoch in range(0, 5):
    # Print epoch
    print(f"Starting epoch {epoch+1}")

    # Set current loss value
    current_loss = 0.0

    # Iterate over the DataLoader for training data
    for i, data in enumerate(trainloader):
        # Get inputs
        inputs, targets = data

        # Zero the gradients
        optimizer.zero_grad()

        # Perform forward pass
        outputs = mlp(inputs)

        # Compute loss
        loss = loss_function(outputs, targets)

        # Perform backward pass
        loss.backward()

        # Perform optimization
        optimizer.step()

        # Print statistics
        current_loss += loss.item()

        #Loss and accuracy for tensorboard
        if i % 100 == 99:
            writer.add_scalar('Loss', current_loss / 100, epoch * len(trainloader) + i)
            print("Loss after mini-batch %5d: %.3f" % (i + 1, current_loss / 100))
            current_loss = 0.0
            
            y_pred_train_mlp = mlp(torch.tensor(X_train.values, dtype=torch.float32)).detach().numpy()

            mlp_train_accuracy = accuracy_score(y_train, y_pred_train_mlp.argmax(axis=1) + 1)
            
            y_pred_test_mlp = mlp(torch.tensor(X_test.values, dtype=torch.float32)).detach().numpy()

            mlp_test_accuracy = accuracy_score(y_test, y_pred_test_mlp.argmax(axis=1) + 1)
            
            print(f"Train, Test accuracy after mini-batch {i+1}: {mlp_train_accuracy}, {mlp_test_accuracy}")
            
            writer.add_scalar('Accuracy', {'train':mlp_train_accuracy, 'test':mlp_test_accuracy}, epoch * len(trainloader) + i)

    print("Loss after epoch %5d: %.3f" % (epoch + 1, loss.item()))
    
# Process is complete.
print("Training process has finished.")

writer.close()

Starting epoch 1
Loss after mini-batch   100: 1.916
Train, Test accuracy after mini-batch 100: 0.36577820136873423, 0.36798533600681566
Loss after mini-batch   200: 1.906
Train, Test accuracy after mini-batch 200: 0.3653844912641537, 0.36718501243513507
Loss after mini-batch   300: 1.907
Train, Test accuracy after mini-batch 300: 0.36549421375231544, 0.36727106873316523
Loss after mini-batch   400: 1.906
Train, Test accuracy after mini-batch 400: 0.3656103905044868, 0.3673571250311954
Loss after mini-batch   500: 1.897
Train, Test accuracy after mini-batch 500: 0.3663354194948893, 0.36825211053070916
Loss after mini-batch   600: 1.886
Train, Test accuracy after mini-batch 600: 0.36534791710143305, 0.3673657306609984
Loss after mini-batch   700: 1.887
Train, Test accuracy after mini-batch 700: 0.36526616309064586, 0.3670903505073019
Loss after mini-batch   800: 1.886
Train, Test accuracy after mini-batch 800: 0.36527476877599185, 0.36711616739671094
Loss after mini-batch   900: 1.887
Tr

KeyboardInterrupt: 

In [13]:
y_pred_mlp = mlp(torch.tensor(X_test.values, dtype=torch.float32)).detach().numpy()

mlp_accuracy = accuracy_score(y_test, y_pred_mlp.argmax(axis=1) + 1)
mlp_accuracy

0.41863111689156235