In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard import SummaryWriter
import os
import time

In [4]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("Using CPU")

Using GPU: NVIDIA GeForce RTX 2070 SUPER


In [5]:
# Load dataset
data = pd.read_csv("log2.csv")

# Preprocess data
# Identify non-numeric columns
categorical_cols = data.select_dtypes(include=['object']).columns

# Encode non-numeric columns to numeric
data[categorical_cols] = data[categorical_cols].apply(lambda col: col.astype('category').cat.codes)

X = data.iloc[:, :-1].values  # Features
y = data.iloc[:, -1].values   # Labels

# Ensure labels are within the correct range
num_classes = len(np.unique(y))
y = np.clip(y, 0, num_classes - 1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Custom Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
# Define RNN Model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, pooling_type="max"):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.pooling_type = pooling_type
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.rnn.hidden_size).to(device)
        out, _ = self.rnn(x, h0)

        if self.pooling_type == "max":
            out, _ = torch.max(out, dim=1)
        elif self.pooling_type == "avg":
            out = torch.mean(out, dim=1)

        out = self.fc(out)
        return out

In [8]:
# Train and Evaluate Function
def train_and_evaluate(model, optimizer, criterion, epochs, scheduler, early_stopping_patience=10):
    writer = SummaryWriter()
    best_loss = float('inf')
    patience = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        scheduler.step(train_loss / len(train_loader))
        
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()

        train_loss /= len(train_loader)
        val_loss /= len(test_loader)
        accuracy = 100 * correct / total

        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Loss/val", val_loss, epoch)
        writer.add_scalar("Accuracy/val", accuracy, epoch)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%")

        if val_loss < best_loss:
            best_loss = val_loss
            patience = 0
        else:
            patience += 1

        if patience >= early_stopping_patience:
            print("Early stopping triggered.")
            break

    writer.close()

# Experiment settings
hidden_sizes = [16, 32, 64]
pooling_types = ["max", "avg"]
epochs_list = [5, 50, 100, 250, 350]
optimizers = ["SGD", "RMSprop", "Adam"]

results = []

# Run experiments
for hidden_size in hidden_sizes:
    for pooling_type in pooling_types:
        for epochs in epochs_list:
            for opt_name in optimizers:
                print(f"Running: Hidden Size={hidden_size}, Pooling={pooling_type}, Epochs={epochs}, Optimizer={opt_name}")

                model = RNNModel(input_size=X.shape[1], hidden_size=hidden_size, num_classes=num_classes, pooling_type=pooling_type).to(device)
                criterion = nn.CrossEntropyLoss()

                if opt_name == "SGD":
                    optimizer = optim.SGD(model.parameters(), lr=0.01)
                elif opt_name == "RMSprop":
                    optimizer = optim.RMSprop(model.parameters(), lr=0.01)
                elif opt_name == "Adam":
                    optimizer = optim.Adam(model.parameters(), lr=0.01)

                scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)

                start_time = time.time()
                train_and_evaluate(model, optimizer, criterion, epochs, scheduler)
                elapsed_time = time.time() - start_time

                results.append([hidden_size, pooling_type, epochs, opt_name, elapsed_time])

# Save results
results_df = pd.DataFrame(results, columns=["Hidden Size", "Pooling", "Epochs", "Optimizer", "Time (s)"])
results_df.to_csv("markovresult.csv", index=False)
print("Results saved to markovresult.csv")

Running: Hidden Size=16, Pooling=max, Epochs=5, Optimizer=SGD




Epoch 1/5, Train Loss: 2.8632, Val Loss: 2.0117, Accuracy: 68.95%
Epoch 2/5, Train Loss: 1.8625, Val Loss: 1.7271, Accuracy: 69.00%
Epoch 3/5, Train Loss: 1.6869, Val Loss: 1.6291, Accuracy: 68.95%
Epoch 4/5, Train Loss: 1.6193, Val Loss: 1.5796, Accuracy: 69.04%
Epoch 5/5, Train Loss: 1.5794, Val Loss: 1.5449, Accuracy: 69.19%
Running: Hidden Size=16, Pooling=max, Epochs=5, Optimizer=RMSprop
Epoch 1/5, Train Loss: 1.3444, Val Loss: 1.2321, Accuracy: 72.03%
Epoch 2/5, Train Loss: 1.2051, Val Loss: 1.2808, Accuracy: 71.42%
Epoch 3/5, Train Loss: 1.1540, Val Loss: 1.1876, Accuracy: 71.77%
Epoch 4/5, Train Loss: 1.1108, Val Loss: 1.4380, Accuracy: 65.02%
Epoch 5/5, Train Loss: 1.0741, Val Loss: 1.0943, Accuracy: 72.37%
Running: Hidden Size=16, Pooling=max, Epochs=5, Optimizer=Adam
Epoch 1/5, Train Loss: 1.4084, Val Loss: 1.2107, Accuracy: 71.74%
Epoch 2/5, Train Loss: 1.1812, Val Loss: 1.1191, Accuracy: 72.09%
Epoch 3/5, Train Loss: 1.0898, Val Loss: 1.0762, Accuracy: 72.22%
Epoch 4/5, Tr