In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("dataset.csv")

# Focus on top 5 genres
top_genres = df['track_genre'].value_counts().nlargest(5).index
df = df[df['track_genre'].isin(top_genres)]

# Select relevant features
features = ['danceability', 'energy', 'acousticness', 'instrumentalness', 'valence', 'tempo']
X = df[features]
y = df['track_genre']

# Drop missing values
X = X.dropna()
y = y[X.index]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base estimator (Decision Tree with max depth to prevent overfitting)
base_tree = DecisionTreeClassifier(criterion='entropy', max_depth=5)

# Boosted model (C5.0-like)
model = AdaBoostClassifier(base_estimator=base_tree, n_estimators=50, learning_rate=1.0)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("C5.0-style (Boosted) Decision Tree Report:")
print(classification_report(y_test, y_pred))