# Housekeeping

In [1]:
%run 'Setup.py'

8 different classes: Electronic, Experimental, Folk, Hip-Hop, Instrumental, International, Pop or Rock.
objective 1: construct a classifier which, based on the features of a song, predicts its genre
objective 2: estimate its generalisation error under the 0–1 loss.
Features are real-valued, correspond to summary statistics (mean, sd, skewness, kurtosis, median, min, max) of 
time series of various music features, such as the chromagram or the Mel-frequency cepstrum.
Feature description: 

Feature description: 
chroma_cens: Chroma Energy Normalized (CENS, 12 chroma) - 84 features
chroma_cqt: Constant-Q chromagram (12 chroma) - 84 features
chroma_stft: Chromagram (12 chroma) - 84 features
mfcc: Mel-frequency cepstrum (20 coefficients) - 140 features
rmse: Root-mean-square - 7 features
spectral_bandwidth: Spectral bandwidth - 7 features
spectral_centroid: Spectral centroid - 7 features
spectral_contrast: Spectral contrast (7 frequency bands) - 49 features
spectral_rolloff: Roll-off freque

## Import and Prepare Data

In [2]:
# Prepare data
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train_np.ravel())

X_train, X_val, Y_train, Y_val = train_test_split(x_train_np, y_train_encoded, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(x_test_np)  # Assuming x_test_np is your test set features without labels

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.int64)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val, dtype=torch.int64)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

# Note: No Y_test_tensor since you don't have y_test labels

# Creating TensorDatasets and DataLoaders for training and validation sets
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, Y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Creating a DataLoader for the test set specifically for prediction purposes
# As there are no test labels, we only pass in the features
test_dataset = TensorDataset(X_test_tensor)  # Note: This only contains features
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)  # For generating predictions

# Initialize device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

## Evaluation Functions for sklearn and pytorch models

In [3]:
def evaluate_pytorch_model(model, dataloaders, device):
    model.eval()  # Set the model to evaluation mode
    performance = {}
    with torch.no_grad():  # No need to track gradients
        for phase in ['train', 'val', 'test']:  # Ensure dataloaders includes all phases you wish to evaluate
            if phase not in dataloaders:  # Skip if DataLoader is not provided for the phase
                continue
            correct, total = 0, 0
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            accuracy = correct / total
            performance[phase + '_accuracy'] = accuracy
            print(f'Accuracy of the {model.__class__.__name__} model on the {phase} set: {accuracy:.2f}')
    return performance

def evaluate_sklearn_model(model, X_train, Y_train, X_test, Y_test):
    performance = {}
    for phase, X, Y in [('train', X_train, Y_train), ('test', X_test, Y_test)]:
        predicted = model.predict(X)
        accuracy = np.mean(predicted == Y)
        performance[phase + '_accuracy'] = accuracy
        print(f'Accuracy of the {model.__class__.__name__} model on the {phase} set: {accuracy:.2f}')
    return performance



## Functions to generate Predictions

In [4]:
def map_predictions_to_genres(predictions, label_encoder):
    genre_predictions = label_encoder.inverse_transform(predictions)
    return genre_predictions

def make_predictions_pytorch(model, X_test_tensor, device):
    model.eval()  # Set the model to evaluation mode
    X_test_tensor = X_test_tensor.to(device)
    with torch.no_grad():  # No need to track the gradients
        outputs = model(X_test_tensor)
        _, predictions = torch.max(outputs, 1)
    # Convert predictions to CPU and numpy for easier handling
    predictions = predictions.cpu().numpy()
    return predictions

def make_predictions_sklearn(model, X_test_scaled):
    predictions = model.predict(X_test_scaled)
    return predictions


## Initialize

In [6]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")