In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load your music dataset
# Replace 'train.csv' with the actual file path or URL of your dataset
music_data = pd.read_csv('train.csv')

# Drop non-numeric columns like 'Artist Name' and 'Track Name' for simplicity
music_data_numeric = music_data.drop(['Artist Name', 'Track Name'], axis=1)

# Handle categorical variables using Label Encoding
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
music_data_numeric['Class'] = label_encoder.fit_transform(music_data_numeric['Class'])

# Split the data into features (X) and target variable (y)
X = music_data_numeric.drop('Class', axis=1)
y = music_data_numeric['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='mean')  # You can use other strategies like 'median' or 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Create a decision tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training set
clf.fit(X_train_imputed, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test_imputed)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 0.36

Confusion Matrix:
[[ 83   0   0  17  18   0   0   6   0   9   3]
 [  0  30  17   0   0  13 116   0   9  21  80]
 [  0  19  81   0   0  12  39   0   5  36  89]
 [ 13   0   0  49   3   0   0   8   0   2   3]
 [ 15   0   0   3  41   0   0   0   0   5   7]
 [  0  19   7   0   0 136  25   0   0  46  29]
 [  0 122  39   0   0  14  91   0  36  67 131]
 [  5   0   0   8   0   0   0  87   0   2   1]
 [  0  22  15   0   0   1  25   0 168   6 145]
 [ 10  43  37   4  10  47  61   0   7 204 108]
 [  2 146  79   7  14  29 133   0 146  96 318]]

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.61      0.63       136
           1       0.07      0.10      0.09       286
           2       0.29      0.29      0.29       281
           3       0.56      0.63      0.59        78
           4       0.48      0.58      0.52        71
           5       0.54      0.52      0.53       262
           6       0.19      0.18      0.18  