In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier  # Import XGBoost classifier
from sklearn.metrics import accuracy_score

In [2]:
# Define a dictionary to map genre names to numerical labels
genre_label_mapping = {
    'blues': 0,
    'classical': 1,
    'country': 2,
    'disco': 3,
    'hiphop': 4,
    'jazz': 5,
    'metal': 6,
    'pop': 7,
    'reggae': 8,
    'rock': 9
}

In [3]:
# Step 1: Read the train.csv and test.csv files
train_data = pd.read_csv("/kaggle/input/pes-ec-mi-competition-a/train.csv")
test_data = pd.read_csv("/kaggle/input/pes-ec-mi-competition-a/test.csv")


In [4]:
# Step 2: Data Preprocessing
# Replace genre names in the "label" column of the train_data DataFrame with numerical labels
train_data['label'] = train_data['label'].map(genre_label_mapping)


In [5]:

# Extract features and labels from the train_data DataFrame
X = train_data.drop(["filename", "label"], axis=1)  # Remove non-feature columns
y = train_data["label"]  # Extract the labels

In [6]:

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (mean = 0, standard deviation = 1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [7]:

# Step 3: Model Training using XGBoost Classifier
# Create and train an XGBoost classifier
xgb_classifier = XGBClassifier(n_estimators=100, random_state=42)
xgb_classifier.fit(X_train, y_train)


In [8]:
# Step 4: Make Predictions
# Use the trained model to make predictions on the validation set
y_pred = xgb_classifier.predict(X_val)

In [9]:
# Step 5: Evaluate the Model
# Calculate accuracy on the validation set
accuracy = accuracy_score(y_val, y_pred)
print("Validation Set Accuracy:", accuracy)

Validation Set Accuracy: 0.8943857698721512


In [10]:

# Step 6: Model Inference
# Apply the trained model to the test data to make predictions
X_test = test_data.drop(["id"], axis=1)  # Remove non-feature columns
X_test = scaler.transform(X_test)  # Standardize the test data
test_predictions = xgb_classifier.predict(X_test)

In [11]:
# Step 7: Create a Submission File
submission_df = pd.DataFrame({"id": test_data["id"], "label": test_predictions})
submission_df.to_csv("submission.csv", index=False)