In [11]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import StandardScaler


In [25]:

data = pd.read_csv('/Users/onature/Desktop/FALL-2024/MATH482/LAeq_fulltrain.csv')
val_size = int(18399*0.2)
train = data[:-val_size].copy()
val = data[-val_size:].copy()

X_train = train.drop(columns='class')
y_train = train['class']
X_val = val.drop(columns='class')
y_val = val['class']



In [15]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.21


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [26]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


In [27]:
X_train = train.drop(columns='class').values
y_train = train['class'].values - 1  # Adjust classes to be 0-based
X_val = val.drop(columns='class').values
y_val = val['class'].values - 1  # Adjust classes to be 0-based


In [28]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

In [33]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.linear(x)


In [46]:
input_dim = X_train.shape[1]
num_classes = 6
model = LogisticRegressionModel(input_dim, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-5)

# TensorBoard writer
writer = SummaryWriter('runs/logistic_regression_experiment')

# Training loop
num_epochs = 100
best_val_accuracy = 0.0
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Calculate training accuracy
    _, predicted = torch.max(outputs, 1)
    train_accuracy = (predicted == y_train).sum().item() / y_train.size(0)

    # Log the loss and accuracy
    writer.add_scalar('Training Loss', loss.item(), epoch)
    writer.add_scalar('Training Accuracy', train_accuracy, epoch)

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
        
        # Calculate validation accuracy
        _, val_predicted = torch.max(val_outputs, 1)
        val_accuracy = (val_predicted == y_val).sum().item() / y_val.size(0)
        
        writer.add_scalar('Validation Loss', val_loss.item(), epoch)
        writer.add_scalar('Validation Accuracy', val_accuracy, epoch)

        # Update best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, '
          f'Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}')

print(f'Best Validation Accuracy (Logistic Regression): {best_val_accuracy:.4f}')

# Close the writer
writer.close()

Epoch [1/100], Loss: 179276.4688, Val Loss: 1131143.7500, Train Acc: 0.0141, Val Acc: 0.2128
Epoch [2/100], Loss: 740258.3125, Val Loss: 2334450.5000, Train Acc: 0.3289, Val Acc: 0.2256
Epoch [3/100], Loss: 2563867.2500, Val Loss: 3358391.2500, Train Acc: 0.2241, Val Acc: 0.2639
Epoch [4/100], Loss: 3445201.5000, Val Loss: 3121384.7500, Train Acc: 0.1880, Val Acc: 0.0834
Epoch [5/100], Loss: 2623905.7500, Val Loss: 3260408.5000, Train Acc: 0.1679, Val Acc: 0.2128
Epoch [6/100], Loss: 2767578.0000, Val Loss: 3001300.5000, Train Acc: 0.3289, Val Acc: 0.1577
Epoch [7/100], Loss: 3148888.2500, Val Loss: 3284199.2500, Train Acc: 0.0870, Val Acc: 0.2256
Epoch [8/100], Loss: 2809777.2500, Val Loss: 3151414.5000, Train Acc: 0.2241, Val Acc: 0.2128
Epoch [9/100], Loss: 2550242.2500, Val Loss: 4140678.2500, Train Acc: 0.3289, Val Acc: 0.2705
Epoch [10/100], Loss: 4162163.2500, Val Loss: 1818240.8750, Train Acc: 0.1863, Val Acc: 0.2256
Epoch [11/100], Loss: 1283897.0000, Val Loss: 3651525.0000, T

In [35]:
class MLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(MLPModel, self).__init__()
        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = torch.relu(self.hidden(x))
        x = self.output(x)
        return x


In [48]:
input_dim = X_train.shape[1]
hidden_dim = 64  # You can adjust the hidden layer size
num_classes = 6
model = MLPModel(input_dim, hidden_dim, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-2)

# TensorBoard writer
writer = SummaryWriter('runs/mlp_experiment')

# Training loop
num_epochs = 100
best_val_accuracy = 0.0
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Calculate training accuracy
    _, predicted = torch.max(outputs, 1)
    train_accuracy = (predicted == y_train).sum().item() / y_train.size(0)

    # Log the loss and accuracy
    writer.add_scalar('Training Loss', loss.item(), epoch)
    writer.add_scalar('Training Accuracy', train_accuracy, epoch)

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
        
        # Calculate validation accuracy
        _, val_predicted = torch.max(val_outputs, 1)
        val_accuracy = (val_predicted == y_val).sum().item() / y_val.size(0)
        
        writer.add_scalar('Validation Loss', val_loss.item(), epoch)
        writer.add_scalar('Validation Accuracy', val_accuracy, epoch)

        # Update best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, '
          f'Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}')

print(f'Best Validation Accuracy (MLP): {best_val_accuracy:.4f}')

Epoch [1/100], Loss: 99543.2422, Val Loss: 952213110784.0000, Train Acc: 0.0132, Val Acc: 0.2128
Epoch [2/100], Loss: 651103371264.0000, Val Loss: 73284.4844, Train Acc: 0.3289, Val Acc: 0.0549
Epoch [3/100], Loss: 11247.8613, Val Loss: 1.8138, Train Acc: 0.0107, Val Acc: 0.2256
Epoch [4/100], Loss: 1.8155, Val Loss: 1.8134, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [5/100], Loss: 1.8148, Val Loss: 1.8130, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [6/100], Loss: 1.8141, Val Loss: 1.8126, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [7/100], Loss: 1.8134, Val Loss: 1.8123, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [8/100], Loss: 1.8127, Val Loss: 1.8119, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [9/100], Loss: 1.8121, Val Loss: 1.8115, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [10/100], Loss: 1.8114, Val Loss: 1.8111, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [11/100], Loss: 1.8107, Val Loss: 1.8107, Train Acc: 0.2241, Val Acc: 0.2256
Epoch [12/100], Loss: 1.8100, Val Loss: 1.8104, Train Acc: 0.2