In [1]:
from flask import Flask, render_template, request
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

app = Flask(__name__)

# ================= LOAD DATA =================
data = pd.read_csv("songs_2000_2020_50k.csv")

# Remove duplicates
data.drop_duplicates(subset=['track_name', 'artist_name'], inplace=True)

# Create target variable
data['recommended'] = data['popularity'].apply(
    lambda x: 1 if x >= 70 else 0
)

# Encode artist
artist_encoder = LabelEncoder()
data['artist_encoded'] = artist_encoder.fit_transform(data['artist_name'])

# Feature selection
features = ['artist_encoded', 'year', 'danceability', 'energy', 'tempo', 'loudness']
X = data[features]
y = data['recommended']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ================= TRAIN MODELS =================
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

xgb = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
xgb.fit(X_train, y_train)

# ================= ROUTES =================
@app.route('/')
def index():
    artists = sorted(data['artist_name'].unique())
    return render_template("index.html", artists=artists)

@app.route('/recommend', methods=['POST'])
def recommend():
    artist = request.form['artist']
    artist_id = artist_encoder.transform([artist])[0]

    artist_songs = data[data['artist_encoded'] == artist_id].copy()

    X_artist = artist_songs[features]
    X_artist = scaler.transform(X_artist)

    rf_prob = rf.predict_proba(X_artist)[:, 1]
    xgb_prob = xgb.predict_proba(X_artist)[:, 1]

    # Ensemble score
    artist_songs['ensemble_score'] = (rf_prob + xgb_prob) / 2

    # Top 6 recommendations
    top6 = artist_songs.sort_values(
        by='ensemble_score', ascending=False
    ).head(6)

    songs = top6[['track_name', 'album_name', 'year']].values.tolist()

    return render_template(
        "result.html",
        artist=artist,
        songs=songs
    )

if __name__ == "__main__":
    app.run(debug=True)


ModuleNotFoundError: No module named 'flask'