In [22]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle

# Step 2: Load Dataset
df = pd.read_csv("spotifydataset.csv")

# Step 3: Drop optional columns if they exist
df = df.drop(columns=[col for col in ['id', 'name', 'artists', 'release_date'] if col in df.columns])

# Step 4: Convert non-numeric columns to numbers using LabelEncoder
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = le.fit_transform(df[column])

# Step 5: Categorize popularity and rename the column to 'target'
def categorize_popularity(popularity):
    if popularity < 40:
        return 'Low'
    elif popularity < 70:
        return 'Medium'
    else:
        return 'High'

df['target'] = df['popularity'].apply(categorize_popularity)
df = df.drop('popularity', axis=1)

# Step 6: Show class distribution
print("Class Distribution:\n", df['target'].value_counts())

# Step 7: Encode target labels
df['target'] = le.fit_transform(df['target'])

# Step 8: Prepare input and target
X = df.drop('target', axis=1)
y = df['target']

# Step 9: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Step 10: Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 11: Predictions & Evaluation
y_pred = model.predict(X_test)

print("\n✅ Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n✅ Classification Report:\n", classification_report(y_test, y_pred))
print("✅ Accuracy Score:", accuracy_score(y_test, y_pred))

# Step 12: Save the model
with open('spotify_popularity_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("\n✅ Model saved as 'spotify_popularity_model.pkl'")


KeyError: 'popularity'