In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load your music dataset
# Replace 'train.csv' with the actual file path or URL of your dataset
music_data = pd.read_csv('train.csv')

# Drop non-numeric columns like 'Artist Name' and 'Track Name' for simplicity
music_data_numeric = music_data.drop(['Artist Name', 'Track Name'], axis=1)

# Handle categorical variables using Label Encoding
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
music_data_numeric['Class'] = label_encoder.fit_transform(music_data_numeric['Class'])

# Split the data into features (X) and target variable (y)
X = music_data_numeric.drop('Class', axis=1)
y = music_data_numeric['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='mean')  # You can use other strategies like 'median' or 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training set
rf_classifier.fit(X_train_imputed, y_train)

# Make predictions on the testing set
y_pred = rf_classifier.predict(X_test_imputed)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 0.50

Confusion Matrix:
[[108   0   0   8   9   0   0   6   0   4   1]
 [  0  10  10   0   0   8 102   0   4  18 134]
 [  0   0 113   0   0   4  26   0   1  24 113]
 [  9   0   0  63   2   0   0   2   0   2   0]
 [ 15   0   0   0  53   0   0   0   0   2   1]
 [  0   1   5   0   0 187   5   0   0  44  20]
 [  0  70  14   0   0  20 126   0   9  60 201]
 [  6   0   0   2   0   0   0  95   0   0   0]
 [  0   0   0   0   0   0  11   0 211   0 160]
 [ 10   7  12   1   5  43  49   0   2 270 132]
 [  3  51  44   3   8  13  93   0  94  81 580]]

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.79      0.75       136
           1       0.07      0.03      0.05       286
           2       0.57      0.40      0.47       281
           3       0.82      0.81      0.81        78
           4       0.69      0.75      0.72        71
           5       0.68      0.71      0.70       262
           6       0.31      0.25      0.28  