<a href="https://colab.research.google.com/github/samuelecivale/Lab-Iagi/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np


# Create a directory to store the dataset
os.makedirs("/content/data", exist_ok=True)

# Navigate to the directory
os.chdir("/content/data")

# Download the dataset using the !wget command
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip

# Unzip the downloaded file
!unzip YearPredictionMSD.txt.zip

# Remove the zip file if needed
!rm YearPredictionMSD.txt.zip

# Load the data into a pandas DataFrame
columns = ["Year"] + [f"Feature_{i}" for i in range(90)]
df = pd.read_csv("YearPredictionMSD.txt", header=None, names=columns)

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(df.drop("Year", axis=1), df["Year"], test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Create custom PyTorch datasets
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create DataLoader instances for training, validation, and testing


--2024-01-24 10:10:12--  https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘YearPredictionMSD.txt.zip’

ionMSD.txt.zip          [             <=>    ] 178.78M  46.3MB/s               ^C
Archive:  YearPredictionMSD.txt.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of YearPredictionMSD.txt.zip or
        YearPredictionMSD.txt.zip.zip, and cannot find YearPredictionMSD.txt.zip.ZIP, period.


KeyboardInterrupt: 

In [34]:
batch_size = 64
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
val_dataset = CustomDataset(X_val_tensor, y_val_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Example: Print the size of the datasets
print("Training set size:", len(train_loader.dataset))
print("Validation set size:", len(val_loader.dataset))
print("Testing set size:", len(test_loader.dataset))

Training set size: 412276
Validation set size: 51534
Testing set size: 51535


In [41]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network architecture
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 1)


    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)



# Initialize the model
input_size = X_train.shape[1]
model = RegressionModel(input_size)


# Define loss function and optimizer (SGD)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
model.apply(weights_init)


# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}")

# Validation
model.eval()
with torch.no_grad():
    val_loss = 0.0
    for inputs, targets in val_loader:
        outputs = model(inputs)
        val_loss += criterion(outputs.squeeze(), targets).item()

    average_val_loss = val_loss / len(val_loader)
    print(f"Validation Loss: {average_val_loss:.4f}")

# Testing
model.eval()
with torch.no_grad():
    test_loss = 0.0
    for inputs, targets in test_loader:
        outputs = model(inputs)
        test_loss += criterion(outputs.squeeze(), targets).item()

    average_test_loss = test_loss / len(test_loader)
    print(f"Test Loss: {average_test_loss:.4f}")


Epoch [1/10], Loss: 1308212.0157
Epoch [2/10], Loss: 182007.0990
Epoch [3/10], Loss: 74288.9210
Epoch [4/10], Loss: 27162.2203
Epoch [5/10], Loss: 7934.9641
Epoch [6/10], Loss: 2038.1666
Epoch [7/10], Loss: 694.7850
Epoch [8/10], Loss: 347.5197
Epoch [9/10], Loss: 219.0328
Epoch [10/10], Loss: 163.1221
Validation Loss: 201.9431
Test Loss: 179.7993


In [59]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np


# Create a directory to store the dataset
os.makedirs("/content/data", exist_ok=True)

# Navigate to the directory
os.chdir("/content/data")

# Download the dataset using the !wget command
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip

# Unzip the downloaded file
!unzip YearPredictionMSD.txt.zip

# Remove the zip file if needed
!rm YearPredictionMSD.txt.zip

# Load the data into a pandas DataFrame
columns = ["Year"] + [f"Feature_{i}" for i in range(90)]
df = pd.read_csv("YearPredictionMSD.txt", header=None, names=columns)

# Map the years to corresponding classes
df['Class'] = df['Year'] - 1922

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(df.drop(["Year", "Class"], axis=1), df["Class"], test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)  # Use long for integer labels
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

missing_classes = set(np.unique(y_train)) - set(np.unique(y_val))

if missing_classes:
    # Add dummy samples for missing classes in the validation set
    for missing_class in missing_classes:
        # Find a representative sample for the missing class
        representative_sample = X_train[y_train == missing_class].iloc[0]

        # Append the representative sample to X_val
        X_val = X_val.append(representative_sample)

        # Append the missing class label to y_val
        y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)

# Ensure the number of classes is consistent between training and validation sets
num_classes_train = len(np.unique(y_train))
num_classes_val = len(np.unique(y_val))

num_classes_train = len(np.unique(y_train))
num_classes_val = len(np.unique(y_val))

print(num_classes_train)
print(num_classes_val)


assert num_classes_train == num_classes_val, "Number of classes in training and validation sets must be the same."


# Create custom PyTorch datasets
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create DataLoader instances for training, validation, and testing
batch_size = 64
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
val_dataset = CustomDataset(X_val_tensor, y_val_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


--2024-01-24 10:23:46--  https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘YearPredictionMSD.txt.zip’

YearPredictionMSD.t     [      <=>           ] 201.24M  34.5MB/s    in 5.6s    

2024-01-24 10:23:52 (36.1 MB/s) - ‘YearPredictionMSD.txt.zip’ saved [211011981]

Archive:  YearPredictionMSD.txt.zip
replace YearPredictionMSD.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
89
89


  X_val = X_val.append(representative_sample)
  y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)
  X_val = X_val.append(representative_sample)
  y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)
  X_val = X_val.append(representative_sample)
  y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)
  X_val = X_val.append(representative_sample)
  y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)
  X_val = X_val.append(representative_sample)
  y_val = y_val.append(pd.Series([missing_class]), ignore_index=True)


In [60]:
import torch.nn as nn
import torch.optim as optim

# Define the neural network
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Set input size, hidden size, and output size
input_size = X_train_tensor.shape[1]
hidden_size = 64  # You can adjust this based on your problem
output_size = num_classes_train  # Number of output classes

# Create an instance of the neural network
classifier = SimpleClassifier(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=0.001)

# Training the neural network
num_epochs = 10
for epoch in range(num_epochs):
    classifier.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = classifier(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation
    classifier.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = classifier(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    average_val_loss = val_loss / len(val_loader)
    accuracy = correct / total

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Validation Loss: {average_val_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

# Testing the neural network
classifier.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = classifier(inputs)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = test_correct / test_total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


IndexError: Target 89 is out of bounds.