# Imports

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
project_dir = '/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/'
data_dir = project_dir + 'data/'

In [4]:
#%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"

In [9]:
%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import os
import importlib
import sys

sys.path.append('/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton')

import user_manager
importlib.reload(user_manager)
from user_manager import UserManager  # ✅ import the new class

import artist_similarity_service
importlib.reload(artist_similarity_service)
from artist_similarity_service import ArtistSimilarityService

project_dir = '/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/'
data_dir = project_dir + 'data/'

users_csv=f"{data_dir}users.csv"
songs_csv=f"{data_dir}songs.csv"
history_csv=f"{data_dir}listening_history.csv"

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv):
        self.songs_csv = songs_csv
        self.history_csv = history_csv
        self.users_csv = users_csv
        self.user_manager = UserManager(users_csv)

    def recommend_songs(self, user_id):
        songs_df = pd.read_csv(self.songs_csv)

        # 🔁 Get recommended artists via UserManager
        artist_list = self.user_manager.get_recommended_artists(user_id)

        if not artist_list:
            print(f"⚠️ No recommended artists for user {user_id}")
            return pd.DataFrame()

        # 🎯 Filter for songs with emotion='joy' from recommended artists
        filtered = songs_df[
            (songs_df['emotion'].str.lower() == 'joy') &
            (songs_df['artist'].str.lower().isin(artist_list))
        ]

        if filtered.empty:
            print("⚠️ No joyful songs found for the recommended artists.")
            return pd.DataFrame()

        #return filtered.sort_values(by='popularity', ascending=False).head(5)
        return filtered.sample(n=min(5, len(filtered)), random_state=None)  # ← dynamic each time

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [8]:
#%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import os
import importlib
import sys

sys.path.append('/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton')

import user_manager
importlib.reload(user_manager)
from user_manager import UserManager  # ✅ import the new class

import artist_similarity_service
importlib.reload(artist_similarity_service)
from artist_similarity_service import ArtistSimilarityService

project_dir = '/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/'
data_dir = project_dir + 'data/'

users_csv=f"{data_dir}users.csv"
songs_csv=f"{data_dir}songs.csv"
history_csv=f"{data_dir}listening_history.csv"

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv):
        self.songs_csv = songs_csv
        self.history_csv = history_csv
        self.users_csv = users_csv
        self.user_manager = UserManager(users_csv)
        self.artist_similarity = ArtistSimilarityService()

    def recommend_songs(self, user_id, emotion='joy', limit=5):
        """
        Recommend songs for a user based on their profile, listening history, and audio preferences

        Parameters:
        - user_id: ID of the user
        - emotion: Target emotion for recommendations (default: 'joy')
        - limit: Maximum number of recommendations to return

        Returns:
        - DataFrame with recommended songs
        """
        # Load necessary data
        songs_df = pd.read_csv(self.songs_csv)
        history_df = pd.read_csv(self.history_csv)
        users_df = pd.read_csv(self.users_csv)

        # Get user information
        user_info = users_df[users_df['user_id'] == user_id]
        if user_info.empty:
            print(f"⚠️ User {user_id} not found")
            return pd.DataFrame()

        # Get user attributes
        user_age = user_info['age'].iloc[0] if 'age' in user_info.columns else None
        user_gender = user_info['gender'].iloc[0] if 'gender' in user_info.columns else None

        # Get recommended artists via UserManager
        artist_list = self.user_manager.get_recommended_artists(user_id)

        if not artist_list:
            print(f"⚠️ No recommended artists for user {user_id}")
            return pd.DataFrame()

        # Get listening history and already listened songs
        user_history = history_df[history_df['user_id'] == user_id]
        already_listened = set()
        if not user_history.empty:
            already_listened = set(user_history['song_id'].unique())

        # Primary filter: songs by targeted emotion from recommended artists
        primary_filter = songs_df[
            (songs_df['emotion'].str.lower() == emotion.lower()) &
            (songs_df['artist'].str.lower().isin([a.lower() for a in artist_list])) &
            (~songs_df['song_id'].isin(already_listened))
        ].copy()

        # If we don't have enough songs, add a secondary filter based on user's preferences
        if len(primary_filter) < limit:
            # Find user's genre preferences
            if not user_history.empty:
                user_songs = songs_df[songs_df['song_id'].isin(user_history['song_id'])]
                if 'genre' in songs_df.columns and not user_songs.empty:
                    genre_counts = user_songs['genre'].value_counts()
                    preferred_genres = list(genre_counts.nlargest(3).index)

                    # Add songs with the same emotion but from preferred genres
                    secondary_filter = songs_df[
                        (songs_df['emotion'].str.lower() == emotion.lower()) &
                        (songs_df['genre'].isin(preferred_genres)) &
                        (~songs_df['song_id'].isin(already_listened)) &
                        (~songs_df['song_id'].isin(primary_filter['song_id']))
                    ].copy()

                    # Combine results
                    filtered = pd.concat([primary_filter, secondary_filter])
                else:
                    filtered = primary_filter
            else:
                filtered = primary_filter
        else:
            filtered = primary_filter

        if filtered.empty:
            print(f"⚠️ No suitable {emotion} songs found for user {user_id}")
            return pd.DataFrame()

        # Analyze user's audio feature preferences
        audio_preferences = {}
        audio_features = ['tempo', 'energy', 'danceability', 'acousticness',
                         'instrumentalness', 'valence', 'speechiness']

        if not user_history.empty:
            user_songs = songs_df[songs_df['song_id'].isin(user_history['song_id'])]
            if not user_songs.empty:
                # Calculate mean and standard deviation for each audio feature
                for feature in audio_features:
                    if feature in songs_df.columns:
                        mean_val = user_songs[feature].mean()
                        std_val = user_songs[feature].std()

                        # Handle case where std is 0 or NaN
                        if pd.isna(std_val) or std_val == 0:
                            std_val = 0.15  # Default range

                        audio_preferences[feature] = {
                            'mean': mean_val,
                            'std': std_val,
                            'min': mean_val - (0.5 * std_val),  # Narrower range for better matching
                            'max': mean_val + (0.5 * std_val)
                        }

        # Create a scoring system to rank songs
        def score_song(song):
            score = 0

            # Artist match bonus
            if song['artist'].lower() in [a.lower() for a in artist_list]:
                score += 5

            # Popularity factor (moderate influence)
            if 'popularity' in song:
                score += min(song['popularity'] / 25, 3)

            # Age-appropriate content bonus
            if user_age is not None and 'maturity_rating' in song:
                if (song['maturity_rating'] == 'E' or
                    (song['maturity_rating'] == 'T' and user_age >= 13) or
                    (song['maturity_rating'] == 'M' and user_age >= 17)):
                    score += 2

            # Gender preference patterns (if available)
            if user_gender is not None and 'appeal_gender' in song:
                if song['appeal_gender'] == user_gender or song['appeal_gender'] == 'all':
                    score += 1

            # Balance for recency vs classics
            if 'release_year' in song:
                current_year = 2025  # Adjust based on your data
                if current_year - song['release_year'] <= 2:  # Recent songs
                    score += 2
                elif current_year - song['release_year'] >= 20:  # Classics
                    score += 1

            # Audio feature matching (up to 6 points total)
            if audio_preferences:
                # Primary features (higher weight)
                primary_features = ['tempo', 'energy', 'valence']
                for feature in primary_features:
                    if feature in song and feature in audio_preferences:
                        if audio_preferences[feature]['min'] <= song[feature] <= audio_preferences[feature]['max']:
                            score += 2

                # Secondary features
                secondary_features = ['danceability', 'acousticness', 'instrumentalness', 'speechiness']
                for feature in secondary_features:
                    if feature in song and feature in audio_preferences:
                        if audio_preferences[feature]['min'] <= song[feature] <= audio_preferences[feature]['max']:
                            score += 1

            return score

        # Apply scoring and sort
        if len(filtered) > 0:
            # Use .loc to avoid the SettingWithCopyWarning
            filtered.loc[:, 'rec_score'] = filtered.apply(score_song, axis=1)
            filtered = filtered.sort_values(by=['rec_score', 'popularity'], ascending=[False, False])

        # Final selection - mix of top songs and diversity
        if len(filtered) > limit:
            # Take 70% from top scored songs
            top_count = max(1, int(limit * 0.7))  # Ensure at least 1 song
            top_picks = filtered.head(top_count)

            # Take 30% randomly from the rest for diversity
            remaining = filtered.iloc[top_count:]
            if len(remaining) > 0:
                diverse_count = limit - top_count
                if diverse_count > 0:
                    diverse_picks = remaining.sample(min(diverse_count, len(remaining)))
                    result = pd.concat([top_picks, diverse_picks])
                else:
                    result = top_picks
            else:
                result = top_picks
        else:
            result = filtered

        # Return recommendations without the scoring column
        if 'rec_score' in result.columns:
            result = result.drop(columns=['rec_score'])

        return result.head(limit)

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


# Drafts

In [None]:
#main one for now, probably a dummy
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
import importlib
import sys

sys.path.append('/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton')
import artist_similarity_service
importlib.reload(artist_similarity_service)
from artist_similarity_service import ArtistSimilarityService

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv=None):
        self.songs_csv = songs_csv
        self.history_csv = history_csv
        self.users_csv = users_csv
        self.artist_similarity = ArtistSimilarityService()

    def get_user_favorite_artists(self, user_id):
        if not self.users_csv:
            return []

        users_df = pd.read_csv(self.users_csv)
        user_row = users_df[users_df['user_id'] == user_id]
        if user_row.empty:
            return []

        fav_str = user_row.iloc[0].get('favorite_artists', '')
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def get_user_history(self, user_id):
        df = pd.read_csv(self.history_csv)
        return df[df['user_id'] == user_id]

    def recommend_songs(self, user_id, threshold=5):
        history_df = pd.read_csv(self.history_csv)
        songs_df = pd.read_csv(self.songs_csv)
        user_history = history_df[history_df['user_id'] == user_id]
        num_logs = user_history.shape[0]

        # For new users: recommend songs based on similar artists
        if num_logs < threshold:
            favorite_artists = self.get_user_favorite_artists(user_id)
            if not favorite_artists:
                return songs_df.sort_values('popularity', ascending=False).head(5)

            expanded_artist_list = self.artist_similarity.recommend_from_favorites(favorite_artists)
            top_artist_names = [artist for artist, score in expanded_artist_list]

            # Ensure favorites are included in final artist list
            top_artist_names += [a for a in favorite_artists if a not in top_artist_names]

            filtered = songs_df[songs_df['artist'].str.lower().isin(top_artist_names)]
            if filtered.empty:
                return songs_df.sample(min(5, len(songs_df)))
            return filtered.sort_values('popularity', ascending=False).head(5)

        # Experienced users: recommend songs by audio and emotion similarity
        return self._recommend_by_audio_emotion(user_history, songs_df)

    def _recommend_by_audio_emotion(self, user_history, songs_df):
        improving = user_history.copy()
        improving['improvement'] = improving['mood_after'].map(self._mood_score) - improving['mood_before'].map(self._mood_score)
        improving = improving[improving['improvement'] > 0]

        if improving.empty:
            return songs_df.sort_values('popularity', ascending=False).head(5)

        positive_song_ids = improving['song_id'].unique()
        feature_cols = [
            'tempo', 'loudness', 'energy', 'danceability', 'positiveness',
            'speechiness', 'liveness', 'acousticness', 'instrumentalness',
            'cos_pitch', 'sin_pitch', 'mode_int'
        ]

        good_songs = songs_df[songs_df['song_id'].isin(positive_song_ids)].dropna(subset=feature_cols)
        target_features = good_songs[feature_cols].values

        if target_features.shape[0] == 0:
            return songs_df.sample(min(5, len(songs_df)))

        scaler = StandardScaler()
        scaled_all = scaler.fit_transform(songs_df[feature_cols].fillna(0))
        scaled_target = scaler.transform(target_features)

        similarity = cosine_similarity(scaled_all, scaled_target).mean(axis=1)
        songs_df['similarity_score'] = similarity
        top_songs = songs_df.sort_values('similarity_score', ascending=False)

        return top_songs.head(5)

    def _mood_score(self, mood):
        mood_map = {'sad': 0, 'anger': 1, 'relaxed': 2, 'happy': 3}
        return mood_map.get(mood, 1)'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
#probably the base version
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"

import pandas as pd

class RecommendationService:
    def __init__(self, songs_csv, history_csv):
        self.songs_csv = songs_csv
        self.history_csv = history_csv

    def get_user_history_count(self, user_id):
        df = pd.read_csv(self.history_csv)
        return df[df['user_id'] == user_id].shape[0]

    def recommend_songs(self, user_id, threshold=5):
        """
        Recommend songs based on user history.
        For new users (< threshold), recommend popular songs.
        For existing users, recommend based on their past liked moods.
        """
        history_df = pd.read_csv(self.history_csv)
        songs_df = pd.read_csv(self.songs_csv)

        user_history = history_df[history_df['user_id'] == user_id]

        if user_history.shape[0] < threshold:
            # New user → recommend top 5 popular songs
            return songs_df.sort_values('popularity', ascending=False).head(5)
        else:
            # Experienced user → recommend similar mood songs
            recent_moods = user_history['mood_after'].value_counts().idxmax()
            recommended = songs_df[songs_df['emotion'] == recent_moods]
            if recommended.empty:
                recommended = songs_df.sample(5)
            return recommended.head(5)'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv):
        self.songs_csv = songs_csv
        self.history_csv = history_csv
        self.users_csv = users_csv
        self.user_manager = UserManager(users_csv)

    def recommend_songs(self, user_id):
        songs_df = pd.read_csv(self.songs_csv)
        history_df = pd.read_csv(self.history_csv)

        # Get user improvement examples
        user_hist = history_df[history_df['user_id'] == user_id]
        user_hist['before'] = user_hist['mood_before'].map(self._mood_score)
        user_hist['after'] = user_hist['mood_after'].map(self._mood_score)
        improved = user_hist[user_hist['after'] > user_hist['before']]
        liked_ids = improved['song_id'].unique()

        liked_songs = songs_df[songs_df['song_id'].isin(liked_ids)]
        if liked_songs.empty:
            return self._fallback_recommendations(songs_df, user_id)

        feature_cols = [
            'positiveness', 'danceability', 'energy',
            'acousticness', 'liveness', 'speechiness',
            'tempo', 'loudness', 'mode_int'
        ]
        liked_features = liked_songs[feature_cols].dropna()
        if liked_features.empty:
            return self._fallback_recommendations(songs_df, user_id)

        # 🎯 Joyful songs by recommended artists
        artist_list = self.user_manager.get_recommended_artists(user_id)
        candidates = songs_df[
            (songs_df['emotion'].str.lower() == 'joy') &
            (songs_df['artist'].str.lower().isin(artist_list))
        ].dropna(subset=feature_cols)

        if candidates.empty:
            return pd.DataFrame()

        # 🔍 Similarity
        scaler = StandardScaler()
        X_liked = scaler.fit_transform(liked_features)
        X_candidates = scaler.transform(candidates[feature_cols])

        sim = cosine_similarity(X_candidates, X_liked).mean(axis=1)
        candidates['similarity'] = sim

        return candidates.sort_values(by='similarity', ascending=False).head(5)

    def _mood_score(self, mood):
        mood_map = {'sad': 0, 'anger': 1, 'relaxed': 2, 'happy': 3}
        return mood_map.get(mood, 1)

    def _fallback_recommendations(self, songs_df, user_id):
        artist_list = self.user_manager.get_recommended_artists(user_id)
        fallback = songs_df[
            (songs_df['emotion'].str.lower() == 'joy') &
            (songs_df['artist'].str.lower().isin(artist_list))
        ]
        if fallback.empty:
            return pd.DataFrame()
        return fallback.sample(n=min(5, len(fallback)))'''

In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"

import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        self.songs_df['artist'] = self.songs_df['artist'].astype(str).str.strip().str.lower()
        self.songs_df['song'] = self.songs_df['song'].astype(str).str.strip().str.lower()
        self.users_df.columns = self.users_df.columns.str.strip()
        if 'favorite_artists' not in self.users_df.columns:
            self.users_df['favorite_artists'] = ''

        ignore_cols = ['song_id', 'artist', 'song', 'title', 'genre', 'emotion']
        self.feature_cols = [c for c in self.songs_df.columns if c not in ignore_cols and self.songs_df[c].dtype in [np.float64, np.int64]]
        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])
        self.mood_score = {'anger': 0, 'sad': 1, 'relaxed': 2, 'happy': 3}

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def recommend_songs(self, user_id, top_n=5):
        user_hist = self.history_df[self.history_df['user_id'] == user_id].copy()
        if user_hist.empty:
            return self._recommend_by_favorites(user_id, top_n)

        for mood_col in ['mood_before', 'mood_after']:
            if mood_col not in user_hist.columns:
                print(f"❌ Missing column: {mood_col}")
                return pd.DataFrame()

        user_hist['before_score'] = user_hist['mood_before'].map(self.mood_score)
        user_hist['after_score'] = user_hist['mood_after'].map(self.mood_score)
        user_hist['delta'] = user_hist['after_score'] - user_hist['before_score']
        improved = user_hist[user_hist['delta'] > 0]

        if improved.empty:
            return self._recommend_by_favorites(user_id, top_n)

        good_song_ids = improved['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)].copy()
        if good_songs.empty:
            return self._recommend_by_favorites(user_id, top_n)

        profile_vec = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        profile_df = pd.DataFrame(profile_vec, columns=self.feature_cols)
        profile_scaled = self.scaler.transform(profile_df)
        similarity = cosine_similarity(self.songs_df_std, profile_scaled).flatten()

        artist_profile = good_songs['artist'].value_counts().to_dict()
        self.songs_df['similarity'] = similarity
        self.songs_df['artist_boost'] = self.songs_df['artist'].map(artist_profile).fillna(0)
        self.songs_df['final_score'] = self.songs_df['similarity'] + 0.1 * self.songs_df['artist_boost']
        self.songs_df['random_noise'] = np.random.uniform(-0.02, 0.02, size=len(self.songs_df))
        self.songs_df['score'] = self.songs_df['final_score'] + self.songs_df['random_noise']

        already_played = user_hist['song_id'].unique()
        candidates = self.songs_df[~self.songs_df['song_id'].isin(already_played)]

        return candidates.sort_values('score', ascending=False).head(top_n)[
            ['song_id', 'artist', 'song', 'score'] + self.feature_cols
        ]

    def _recommend_by_favorites(self, user_id, top_n=5):
        fav_artists = self.get_user_favorites(user_id)
        if not fav_artists:
            return self.songs_df.sample(top_n)[['song_id', 'artist', 'song'] + self.feature_cols]

        fav_songs = self.songs_df[self.songs_df['artist'].isin(fav_artists)].copy()
        fav_vec = fav_songs[self.feature_cols].mean().values.reshape(1, -1)
        fav_df = pd.DataFrame(fav_vec, columns=self.feature_cols)
        vec_scaled = self.scaler.transform(fav_df)
        similarity = cosine_similarity(self.songs_df_std, vec_scaled).flatten()

        self.songs_df['similarity'] = similarity
        self.songs_df['score'] = self.songs_df['similarity'] + np.random.uniform(-0.02, 0.02, size=len(self.songs_df))

        return self.songs_df.sort_values('score', ascending=False).head(top_n)[
            ['song_id', 'artist', 'song', 'score'] + self.feature_cols
        ]
'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        # Normalize text fields
        self.songs_df['artist'] = self.songs_df['artist'].astype(str).str.strip().str.lower()
        self.songs_df['song'] = self.songs_df['song'].astype(str).str.strip().str.lower()
        self.users_df.columns = self.users_df.columns.str.strip()
        if 'favorite_artists' not in self.users_df.columns:
            self.users_df['favorite_artists'] = ''

        # Prepare features
        ignore_cols = ['song_id', 'artist', 'song', 'title', 'genre', 'emotion']
        self.feature_cols = [c for c in self.songs_df.columns if c not in ignore_cols and self.songs_df[c].dtype in [np.float64, np.int64]]
        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])

        self.mood_score = {'anger': 0, 'sad': 1, 'relaxed': 2, 'happy': 3}

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def recommend_songs(self, user_id, top_n=5):
        user_hist = self.history_df[self.history_df['user_id'] == user_id].copy()
        if user_hist.empty:
            return self._recommend_by_favorites(user_id, top_n)

        # Mood delta
        for mood_col in ['mood_before', 'mood_after']:
            if mood_col not in user_hist.columns:
                print(f"❌ Missing column: {mood_col}")
                return pd.DataFrame()
        user_hist['before_score'] = user_hist['mood_before'].map(self.mood_score)
        user_hist['after_score'] = user_hist['mood_after'].map(self.mood_score)
        user_hist['delta'] = user_hist['after_score'] - user_hist['before_score']
        improved = user_hist[user_hist['delta'] > 0]

        if improved.empty:
            return self._recommend_by_favorites(user_id, top_n)

        # Use improved songs as base
        good_song_ids = improved['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)].copy()
        if good_songs.empty:
            return self._recommend_by_favorites(user_id, top_n)

        profile_vec = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        artist_profile = good_songs['artist'].value_counts().to_dict()

        all_scaled = self.songs_df_std
        profile_scaled = self.scaler.transform(profile_vec)
        similarity = cosine_similarity(all_scaled, profile_scaled).flatten()

        self.songs_df['similarity'] = similarity
        self.songs_df['artist_boost'] = self.songs_df['artist'].map(artist_profile).fillna(0)
        self.songs_df['final_score'] = self.songs_df['similarity'] + 0.1 * self.songs_df['artist_boost']
        self.songs_df['random_noise'] = np.random.uniform(-0.02, 0.02, size=len(self.songs_df))
        self.songs_df['score'] = self.songs_df['final_score'] + self.songs_df['random_noise']

        already_played = user_hist['song_id'].unique()
        candidates = self.songs_df[~self.songs_df['song_id'].isin(already_played)]

        return candidates.sort_values('score', ascending=False).head(top_n)[
            ['artist', 'song', 'score'] + self.feature_cols
        ]

    def _recommend_by_favorites(self, user_id, top_n=5):
        fav_artists = self.get_user_favorites(user_id)
        if not fav_artists:
            return self.songs_df.sample(top_n)[['artist', 'song'] + self.feature_cols]

        fav_songs = self.songs_df[self.songs_df['artist'].isin(fav_artists)].copy()
        fav_vec = fav_songs[self.feature_cols].mean().values.reshape(1, -1)
        all_scaled = self.songs_df_std
        vec_scaled = self.scaler.transform(fav_vec)
        similarity = cosine_similarity(all_scaled, vec_scaled).flatten()

        self.songs_df['similarity'] = similarity
        self.songs_df['score'] = self.songs_df['similarity'] + np.random.uniform(-0.02, 0.02, size=len(self.songs_df))

        return self.songs_df.sort_values('score', ascending=False).head(top_n)[
            ['artist', 'song', 'score'] + self.feature_cols
        ]'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, users_csv, history_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        # Normalize text fields
        self.songs_df['artist'] = self.songs_df['artist'].astype(str).str.strip().str.lower()
        self.songs_df['song'] = self.songs_df['song'].astype(str).str.strip().str.lower()
        self.users_df.columns = self.users_df.columns.str.strip()
        if 'favorite_artists' not in self.users_df.columns:
            self.users_df['favorite_artists'] = ''

        # Prepare features
        ignore_cols = ['song_id', 'artist', 'song', 'title', 'genre', 'emotion']
        self.feature_cols = [c for c in self.songs_df.columns if c not in ignore_cols and self.songs_df[c].dtype in [np.float64, np.int64]]
        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])

        self.mood_score = {'anger': 0, 'sad': 1, 'relaxed': 2, 'happy': 3}

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def recommend_mood_aware(self, user_id, top_n=5):
        user_hist = self.history_df[self.history_df['user_id'] == user_id].copy()
        if user_hist.empty:
            print("⚠ No history found for user.")
            return pd.DataFrame()

        # Calculate mood delta
        user_hist['mood_before_score'] = user_hist['mood_before'].map(self.mood_score)
        user_hist['mood_after_score'] = user_hist['mood_after'].map(self.mood_score)
        user_hist['delta'] = user_hist['mood_after_score'] - user_hist['mood_before_score']
        improved = user_hist[user_hist['delta'] > 0]
        if improved.empty:
            print("⚠ No improving sessions found.")
            return pd.DataFrame()

        # Songs that improved mood
        good_song_ids = improved['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)].copy()
        if good_songs.empty:
            print("⚠ No matching song info for improved sessions.")
            return pd.DataFrame()

        # Build profile from improved songs
        feature_profile = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        artist_boost = good_songs['artist'].value_counts().to_dict()

        # Similarity to all songs
        X_all = self.songs_df_std
        profile_std = self.scaler.transform(feature_profile)
        similarities = cosine_similarity(X_all, profile_std).flatten()

        # Build candidate set
        self.songs_df['similarity'] = similarities
        self.songs_df['artist_boost'] = self.songs_df['artist'].map(artist_boost).fillna(0)
        self.songs_df['final_score'] = self.songs_df['similarity'] + 0.1 * self.songs_df['artist_boost']

        already_played = user_hist['song_id'].unique()
        candidates = self.songs_df[~self.songs_df['song_id'].isin(already_played)]

        return candidates.sort_values('final_score', ascending=False).head(top_n)[
            ['artist', 'song', 'final_score'] + self.feature_cols
        ]
    def recommend_songs(self, user_id, top_n=5):
      return self.recommend_mood_aware(user_id, top_n)'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, users_csv, history_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        # Normalize text fields
        self.songs_df['artist'] = self.songs_df['artist'].astype(str).str.strip().str.lower()
        self.songs_df['song'] = self.songs_df['song'].astype(str).str.strip().str.lower()
        self.users_df.columns = self.users_df.columns.str.strip()
        if 'favorite_artists' not in self.users_df.columns:
            self.users_df['favorite_artists'] = ''

        # Prepare features
        ignore_cols = ['song_id', 'artist', 'song', 'title', 'genre', 'emotion']
        self.feature_cols = [c for c in self.songs_df.columns if c not in ignore_cols and self.songs_df[c].dtype in [np.float64, np.int64]]
        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])

        self.mood_score = {'anger': 0, 'sad': 1, 'relaxed': 2, 'happy': 3}

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def recommend_mood_aware(self, user_id, top_n=5):
        user_hist = self.history_df[self.history_df['user_id'] == user_id].copy()
        if user_hist.empty:
            print("⚠ No history found for user.")
            return pd.DataFrame()

        # Calculate mood delta
        user_hist['mood_before_score'] = user_hist['mood_before'].map(self.mood_score)
        user_hist['mood_after_score'] = user_hist['mood_after'].map(self.mood_score)
        user_hist['delta'] = user_hist['mood_after_score'] - user_hist['mood_before_score']
        improved = user_hist[user_hist['delta'] > 0]
        if improved.empty:
            print("⚠ No improving sessions found.")
            return pd.DataFrame()

        # Songs that improved mood
        good_song_ids = improved['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)].copy()
        if good_songs.empty:
            print("⚠ No matching song info for improved sessions.")
            return pd.DataFrame()

        # Build profile from improved songs
        feature_profile = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        artist_boost = good_songs['artist'].value_counts().to_dict()

        # Similarity to all songs
        X_all = self.songs_df_std
        profile_std = self.scaler.transform(feature_profile)
        similarities = cosine_similarity(X_all, profile_std).flatten()

        # Build candidate set
        self.songs_df['similarity'] = similarities
        self.songs_df['artist_boost'] = self.songs_df['artist'].map(artist_boost).fillna(0)
        self.songs_df['final_score'] = self.songs_df['similarity'] + 0.1 * self.songs_df['artist_boost']

        already_played = user_hist['song_id'].unique()
        candidates = self.songs_df[~self.songs_df['song_id'].isin(already_played)]

        return candidates.sort_values('final_score', ascending=False).head(top_n)[
            ['artist', 'song', 'final_score'] + self.feature_cols
        ]'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv=None):
        self.songs_csv = songs_csv
        self.history_csv = history_csv
        self.users_csv = users_csv
        self.artist_similarity = ArtistSimilarityService()

    def get_user_favorite_artists(self, user_id):
        if not self.users_csv:
            return []
        users_df = pd.read_csv(self.users_csv)
        row = users_df[users_df['user_id'] == user_id]
        if row.empty:
            return []
        return [a.strip().lower() for a in str(row.iloc[0].get('favorite_artists', '')).split(',') if a.strip()]

    def get_user_history(self, user_id):
        if not os.path.exists(self.history_csv):
            return pd.DataFrame()
        df = pd.read_csv(self.history_csv)
        return df[df['user_id'] == user_id]

    def recommend_songs(self, user_id, min_logs_for_personal=5):
      self.history_df = pd.read_csv(self.history_csv)  # 💡 force fresh read
      self.songs_df = pd.read_csv(self.songs_csv)      # (optional) refresh song list
      user_history = self.history_df[self.history_df['user_id'] == user_id]
      num_logs = len(user_history)
      if num_logs < min_logs_for_personal:
        return self._recommend_by_artist(user_id, self.songs_df)
      else:
        return self._recommend_by_emotional_similarity(user_id, user_history, self.songs_df)

    def _recommend_by_artist(self, user_id, songs_df):
        favorite_artists = self.get_user_favorite_artists(user_id)
        if not favorite_artists:
            return songs_df.sort_values("popularity", ascending=False).head(5)

        similar_artists = self.artist_similarity.recommend_from_favorites(favorite_artists)
        expanded_artists = [a for a, _ in similar_artists]

        filtered = songs_df[songs_df['artist'].str.lower().isin(expanded_artists)]
        if filtered.empty:
            return songs_df.sample(min(5, len(songs_df)))
        return filtered.sort_values("popularity", ascending=False).head(5)

    def _recommend_by_emotional_similarity(self, user_id, user_history, songs_df):
        feature_cols = [
            'tempo', 'loudness', 'energy', 'danceability', 'positiveness',
            'speechiness', 'liveness', 'acousticness', 'instrumentalness',
            'cos_pitch', 'sin_pitch', 'mode_int'
        ]
        improving = user_history.copy()
        improving['score_before'] = improving['mood_before'].map(self._mood_score)
        improving['score_after'] = improving['mood_after'].map(self._mood_score)
        improving['delta'] = improving['score_after'] - improving['score_before']
        improving = improving[improving['delta'] > 0]

        if improving.empty:
            return songs_df.sort_values('popularity', ascending=False).head(5)

        uplift_songs = improving['song_id'].unique()
        base = songs_df[songs_df['song_id'].isin(uplift_songs)].dropna(subset=feature_cols)

        if base.empty:
            return songs_df.sample(min(5, len(songs_df)))

        scaler = StandardScaler()
        all_features = songs_df[feature_cols].fillna(0)
        good_features = base[feature_cols]

        X_all = scaler.fit_transform(all_features)
        X_good = scaler.transform(good_features)

        similarity_scores = cosine_similarity(X_all, X_good).mean(axis=1)
        songs_df['similarity_score'] = similarity_scores

        already_heard = set(user_history['song_id'])
        candidate_pool = songs_df[~songs_df['song_id'].isin(already_heard)].copy()
        if candidate_pool.empty:
            return songs_df.sample(min(5, len(songs_df)))

        candidate_pool['final_score'] = (
            0.5 * candidate_pool['similarity_score'] +
            0.3 * candidate_pool['popularity'].fillna(0) +
            0.2 * np.random.rand(len(candidate_pool))  # slight randomness for diversity
        )

        return candidate_pool.sort_values('final_score', ascending=False).head(5)

    def _mood_score(self, mood):
        mood_map = {'sad': 0, 'anger': 1, 'relaxed': 2, 'happy': 3}
        return mood_map.get(mood, 1)'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import requests
from collections import Counter
import json
from sklearn.metrics.pairwise import cosine_similarity

# === CONFIG ===
X_LOG_THRESHOLD = 20  # adjust as needed

# === Deezer API helpers ===
def get_artist_id(artist_name):
    url = f"https://api.deezer.com/search/artist?q={artist_name}"
    response = requests.get(url)
    data = response.json()
    if data['data']:
        return data['data'][0]['id']
    return None

def get_similar_artists(artist_id):
    url = f"https://api.deezer.com/artist/{artist_id}/related"
    response = requests.get(url)
    data = response.json()
    return [artist['name'].lower() for artist in data['data']]

def recommend_artists(favorite_artists, save_to_file=False):
    all_similar = []

    for name in favorite_artists:
        artist_id = get_artist_id(name)
        if artist_id:
            print(f"Found artist: {name} — fetching similar artists...")
            similar = get_similar_artists(artist_id)
            all_similar.extend(similar)
        else:
            print(f"Could not find artist: {name}")

    recommended = Counter(all_similar)
    final_set = set([a.lower() for a in favorite_artists]) | set(recommended.keys())

    if save_to_file:
        with open("recommended_artists.json", "w") as f:
            json.dump(list(final_set), f, indent=2)

    return list(final_set)

# === History filtering ===
def filter_history_by_artists(history_df, songs_df, artist_list):
    artist_list = [a.lower() for a in artist_list]

    # Merge history with song details
    merged = history_df.merge(songs_df[['song_id', 'artist']], on='song_id', how='left')

    # Filter on artist
    filtered = merged[merged['artist'].str.lower().isin(artist_list)]
    return filtered

# === Find top improving songs ===
def find_best_improving_songs(filtered_history, songs_df):
    filtered_history['improvement'] = filtered_history['mood_after'] - filtered_history['mood_before']
    improving_songs = (
        filtered_history.groupby('song_id')['improvement'].mean()
        .sort_values(ascending=False)
    )

    top_song_ids = improving_songs.head(5).index.tolist()
    recommendations = songs_df[songs_df['song_id'].isin(top_song_ids)]
    return recommendations

# === Find similar songs using audio features ===
def find_similar_songs(song_id, songs_df, feature_cols):
    target_song = songs_df[songs_df['song_id'] == song_id][feature_cols].values
    all_songs = songs_df[feature_cols].values
    similarities = cosine_similarity(target_song, all_songs).flatten()
    songs_df['similarity'] = similarities
    similar_songs = songs_df.sort_values('similarity', ascending=False).head(5)
    return similar_songs

# === Main recommendation function ===
def recommend_for_user(user_id, users_df, history_df, songs_df):
    user_row = users_df[users_df['user_id'] == user_id]
    if user_row.empty:
        print(f"⚠ User ID {user_id} not found.")
        return songs_df.sample(min(5, len(songs_df)))

    fav_artists_str = str(user_row.iloc[0].get('favorite_artists', ''))
    fav_artists = [artist.strip().lower() for artist in fav_artists_str.split(',') if artist.strip()]
    print(f"🎯 User {user_id} favorite artists: {fav_artists}")

    user_logs = history_df[history_df['user_id'] == user_id]
    num_logs = len(user_logs)
    print(f"📖 User has {num_logs} past logs")

    if num_logs < X_LOG_THRESHOLD:
        print("🆕 Low-activity user — using similar artist recommendations.")
        artist_pool = recommend_artists(fav_artists)
        filtered_history = filter_history_by_artists(history_df, songs_df, artist_pool)
        if filtered_history.empty:
            print("⚠ No historical plays found for similar artists — fallback to random.")
            recommendations = songs_df.sample(min(5, len(songs_df)))
        else:
            recommendations = find_best_improving_songs(filtered_history, songs_df)
    else:
        print("🔍 Experienced user — using personal improvement history + audio features.")
        user_logs['improvement'] = user_logs['mood_after'] - user_logs['mood_before']
        top_user_songs = (
            user_logs.groupby('song_id')['improvement'].mean()
            .sort_values(ascending=False)
            .head(1)
            .index.tolist()
        )

        if not top_user_songs:
            print("⚠ No strong improvement songs found — fallback to random.")
            recommendations = songs_df.sample(min(5, len(songs_df)))
        else:
            feature_cols = [
                'tempo', 'loudness', 'energy', 'danceability', 'positiveness',
                'speechiness', 'liveness', 'acousticness', 'instrumentalness',
                'speechiness_log', 'cos_pitch', 'sin_pitch'
            ]
            recommendations = find_similar_songs(top_user_songs[0], songs_df, feature_cols)

    if recommendations.empty:
        print("⚠ No recommendations found — fallback to random.")
        recommendations = songs_df.sample(min(5, len(songs_df)))

    print("🎵 Final Recommendations:")
    print(recommendations[['artist', 'song']])
    return recommendations'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''#from recommendation_service import RecommendationService
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
import os

def test_recommendation_works():
    songs = pd.DataFrame({
        'song_id': [1, 2],
        'artist': ['adele', 'coldplay'],
        'song': ['hello', 'yellow'],
        'tempo': [100, 110],
        'energy': [0.8, 0.7],
        'danceability': [0.5, 0.4]
    })
    songs.to_csv("songs.csv", index=False)

    users = pd.DataFrame({
        'user_id': [1],
        'favorite_artists': ['adele, coldplay']
    })
    users.to_csv("users.csv", index=False)

    history = pd.DataFrame({
        'user_id': [1],
        'song_id': [1],
        'mood_before': ['sad'],
        'mood_after': ['happy']
    })
    history.to_csv("history.csv", index=False)

    scaler = StandardScaler()
    scaler.fit(songs[['tempo', 'energy', 'danceability']])
    joblib.dump(scaler, 'scaler.joblib')

    recommender = RecommendationService("songs.csv", "history.csv", "users.csv", "scaler.joblib")
    result = recommender.recommend_songs(1)
    assert isinstance(result, pd.DataFrame)
    assert 'song_id' in result.columns
    assert not result.empty

    for f in ["songs.csv", "users.csv", "history.csv", "scaler.joblib"]:
        os.remove(f)'''

In [None]:
#with open("test_simple.py", "w") as f:
#    f.write('''\
'''import sys
sys.path.append("/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton")
from recommendation_service import RecommendationService

import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
import os

def test_recommendation_works():
    songs = pd.DataFrame({
        'song_id': [1, 2],
        'artist': ['adele', 'coldplay'],
        'song': ['hello', 'yellow'],
        'tempo': [100, 110],
        'energy': [0.8, 0.7],
        'danceability': [0.5, 0.4]
    })
    songs.to_csv("songs.csv", index=False)

    users = pd.DataFrame({
        'user_id': [1],
        'favorite_artists': ['adele, coldplay']
    })
    users.to_csv("users.csv", index=False)

    history = pd.DataFrame({
        'user_id': [1],
        'song_id': [1],
        'mood_before': ['sad'],
        'mood_after': ['happy']
    })
    history.to_csv("history.csv", index=False)

    scaler = StandardScaler()
    scaler.fit(songs[['tempo', 'energy', 'danceability']])
    joblib.dump(scaler, 'scaler.joblib')

    recommender = RecommendationService("songs.csv", "history.csv", "users.csv", "scaler.joblib")
    result = recommender.recommend_songs(1)

    assert isinstance(result, pd.DataFrame)
    assert 'song_id' in result.columns
    assert not result.empty

    for f in ["songs.csv", "users.csv", "history.csv", "scaler.joblib"]:
        os.remove(f)
'''#)'''

In [None]:
'''%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        # Normalize
        self.songs_df['artist'] = self.songs_df['artist'].astype(str).str.strip().str.lower()
        self.songs_df['song'] = self.songs_df['song'].astype(str).str.strip().str.lower()
        self.users_df.columns = self.users_df.columns.str.strip()

        if 'favorite_artists' not in self.users_df.columns:
            self.users_df['favorite_artists'] = ''

        ignore_cols = ['song_id', 'artist', 'song', 'title', 'genre', 'emotion']
        self.feature_cols = [col for col in self.songs_df.columns if col not in ignore_cols and self.songs_df[col].dtype in [np.float64, np.int64]]

        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])
        self.mood_score = {'anger': 0, 'sad': 1, 'relaxed': 2, 'happy': 3}

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        if pd.isna(fav_str):
            fav_str = ''
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def recommend_songs(self, user_id, top_n=5):
        user_hist = self.history_df[self.history_df['user_id'] == user_id].copy()
        if user_hist.empty or 'mood_before' not in user_hist.columns or 'mood_after' not in user_hist.columns:
            return self._recommend_by_favorites(user_id, top_n)

        user_hist['before_score'] = user_hist['mood_before'].map(self.mood_score)
        user_hist['after_score'] = user_hist['mood_after'].map(self.mood_score)
        user_hist['delta'] = user_hist['after_score'] - user_hist['before_score']
        improved = user_hist[user_hist['delta'] > 0]

        if improved.empty:
            return self._recommend_by_favorites(user_id, top_n)

        good_song_ids = improved['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)].copy()
        if good_songs.empty:
            return self._recommend_by_favorites(user_id, top_n)

        profile_vec = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        profile_df = pd.DataFrame(profile_vec, columns=self.feature_cols)
        profile_scaled = self.scaler.transform(profile_df)
        similarity = cosine_similarity(self.songs_df_std, profile_scaled).flatten()

        artist_profile = good_songs['artist'].value_counts().to_dict()
        self.songs_df['similarity'] = similarity
        self.songs_df['artist_boost'] = self.songs_df['artist'].map(artist_profile).fillna(0)
        self.songs_df['final_score'] = self.songs_df['similarity'] + 0.1 * self.songs_df['artist_boost']
        self.songs_df['random_noise'] = np.random.uniform(-0.02, 0.02, size=len(self.songs_df))
        self.songs_df['score'] = self.songs_df['final_score'] + self.songs_df['random_noise']

        already_played = user_hist['song_id'].unique()
        candidates = self.songs_df[~self.songs_df['song_id'].isin(already_played)]

        return candidates.sort_values('score', ascending=False).head(top_n)[
            ['song_id', 'artist', 'song', 'score'] + self.feature_cols
        ]

    def _recommend_by_favorites(self, user_id, top_n=5):
        fav_artists = self.get_user_favorites(user_id)
        if not fav_artists:
            # Fallback to popular/random songs
            self.songs_df['score'] = np.random.uniform(0, 1, len(self.songs_df))
            return self.songs_df.sort_values('score', ascending=False).head(top_n)[
                ['song_id', 'artist', 'song', 'score'] + self.feature_cols
            ]

        fav_songs = self.songs_df[self.songs_df['artist'].isin(fav_artists)].copy()
        fav_vec = fav_songs[self.feature_cols].mean().values.reshape(1, -1)
        fav_df = pd.DataFrame(fav_vec, columns=self.feature_cols)
        vec_scaled = self.scaler.transform(fav_df)
        similarity = cosine_similarity(self.songs_df_std, vec_scaled).flatten()

        self.songs_df['similarity'] = similarity
        self.songs_df['score'] = self.songs_df['similarity'] + np.random.uniform(-0.02, 0.02, size=len(self.songs_df))

        return self.songs_df.sort_values('score', ascending=False).head(top_n)[
            ['song_id', 'artist', 'song', 'score'] + self.feature_cols
        ]'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''# Write the full adjusted recommendation_service.py with blend support
%%writefile "/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py"
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class RecommendationService:
    def __init__(self, songs_csv, history_csv, users_csv, scaler_path='scaler.joblib'):
        self.songs_df = pd.read_csv(songs_csv)
        self.users_df = pd.read_csv(users_csv)
        self.history_df = pd.read_csv(history_csv)
        self.scaler = joblib.load(scaler_path)

        self.feature_cols = [col for col in self.songs_df.columns if col not in [
            'song_id', 'artist', 'song', 'title', 'genre', 'emotion'
        ] and self.songs_df[col].dtype in [np.float64, np.int64]]

        self.songs_df[self.feature_cols] = self.songs_df[self.feature_cols].fillna(0)
        self.songs_df_std = self.scaler.transform(self.songs_df[self.feature_cols])

    def get_user_favorites(self, user_id):
        row = self.users_df[self.users_df['user_id'] == user_id]
        if row.empty:
            return []
        fav_str = row.iloc[0].get('favorite_artists', '')
        if isinstance(fav_str, float):  # in case it's NaN
            return []
        return [a.strip().lower() for a in fav_str.split(',') if a.strip()]

    def _mood_score(self, mood):
        mood_map = {'sad': 0, 'anger': 1, 'relaxed': 2, 'happy': 3}
        return mood_map.get(mood, 1)

    def _recommend_by_history(self, user_id, top_n=5):
        history_df = self.history_df[self.history_df['user_id'] == user_id]
        history_df = history_df.copy()
        if history_df.empty:
            return pd.DataFrame()

        history_df['score'] = history_df['mood_after'].map(self._mood_score) - history_df['mood_before'].map(self._mood_score)
        improved_df = history_df[history_df['score'] > 0]
        if improved_df.empty:
            return pd.DataFrame()

        good_song_ids = improved_df['song_id'].unique()
        good_songs = self.songs_df[self.songs_df['song_id'].isin(good_song_ids)]
        if good_songs.empty:
            return pd.DataFrame()

        mean_vec = good_songs[self.feature_cols].mean().values.reshape(1, -1)
        mean_std = self.scaler.transform(mean_vec)
        similarities = cosine_similarity(self.songs_df_std, mean_std).flatten()

        df = self.songs_df.copy()
        df['score'] = similarities
        df = df[~df['song_id'].isin(good_song_ids)]
        df = df.sort_values(by='score', ascending=False)
        return df[['song_id', 'artist', 'song', 'score']].head(top_n)

    def _recommend_by_favorites(self, user_id, top_n=5):
        fav_artists = self.get_user_favorites(user_id)
        if not fav_artists:
            return pd.DataFrame()

        fav_songs = self.songs_df[self.songs_df['artist'].str.lower().isin(fav_artists)]
        if fav_songs.empty:
            return pd.DataFrame()

        mean_vec = fav_songs[self.feature_cols].mean().values.reshape(1, -1)
        mean_std = self.scaler.transform(mean_vec)
        similarities = cosine_similarity(self.songs_df_std, mean_std).flatten()

        df = self.songs_df.copy()
        df['score'] = similarities + np.random.uniform(-0.05, 0.05, size=len(df))
        df = df[~df['artist'].str.lower().isin(fav_artists)]  # skip exact same artist songs
        df = df.sort_values(by='score', ascending=False)
        return df[['song_id', 'artist', 'song', 'score']].head(top_n)

    def _recommend_random(self, top_n=5):
        df = self.songs_df.copy()
        df['score'] = np.random.rand(len(df))
        return df[['song_id', 'artist', 'song', 'score']].sort_values(by='score', ascending=False).head(top_n)

    def recommend_songs(self, user_id, top_n=5):
        hist = self._recommend_by_history(user_id, top_n)
        if not hist.empty:
            return hist
        fav = self._recommend_by_favorites(user_id, top_n)
        if not fav.empty:
            return fav
        return self._recommend_random(top_n)

    def recommend_blended(self, user_id, top_n=5):
        h = self._recommend_by_history(user_id, top_n*2)
        f = self._recommend_by_favorites(user_id, top_n*2)
        r = self._recommend_random(top_n*2)

        df = pd.concat([h, f, r], ignore_index=True).drop_duplicates(subset='song_id')
        df = df.sort_values(by='score', ascending=False)
        return df[['song_id', 'artist', 'song', 'score']].head(top_n)'''

Overwriting /content/drive/MyDrive/Colab Notebooks/project/Music/skeleton/recommendation_service.py


In [None]:
'''import importlib
import sys
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
import os
sys.path.append('/content/drive/MyDrive/Colab Notebooks/project/Music/skeleton')
import recommendation_service
importlib.reload(recommendation_service)
from recommendation_service import RecommendationService


def setup_test_data():
    songs = pd.DataFrame({
        'song_id': [1, 2, 3],
        'artist': ['adele', 'coldplay', 'daft punk'],
        'song': ['hello', 'yellow', 'get lucky'],
        'tempo': [100, 110, 120],
        'energy': [0.8, 0.7, 0.9],
        'danceability': [0.5, 0.4, 0.8]
    })
    songs.to_csv("songs.csv", index=False)

    users = pd.DataFrame({
        'user_id': [1, 2, 3],
        'favorite_artists': ['adele, coldplay', 'coldplay', '']
    })
    users.to_csv("users.csv", index=False)

    history = pd.DataFrame({
        'user_id': [1],
        'song_id': [1],
        'mood_before': ['sad'],
        'mood_after': ['happy']
    })
    history.to_csv("history.csv", index=False)

    scaler = StandardScaler()
    scaler.fit(songs[['tempo', 'energy', 'danceability']])
    joblib.dump(scaler, 'scaler.joblib')

def cleanup_files():
    for f in ["songs.csv", "users.csv", "history.csv", "scaler.joblib"]:
        if os.path.exists(f):
            os.remove(f)

def show_recommendations_for_user(user_id, label):
    recommender = RecommendationService("songs.csv", "history.csv", "users.csv", "scaler.joblib")
    result = recommender.recommend_songs(user_id)
    print(f"\n📊 Recommendations for {label} (user_id={user_id}):")
    print(result[['artist', 'song', 'score']] if not result.empty else "⚠️ No results.")

# Run full test suite
setup_test_data()

try:
    show_recommendations_for_user(1, "user with mood improvement history")
    show_recommendations_for_user(2, "user with no history, but favorites")
    show_recommendations_for_user(3, "user with no history and no favorites")
finally:
    cleanup_files()
'''


📊 Recommendations for user with mood improvement history (user_id=1):
      artist       song     score
1   coldplay     yellow  0.187387
2  daft punk  get lucky -0.699881

📊 Recommendations for user with no history, but favorites (user_id=2):
      artist       song     score
1   coldplay     yellow  0.985192
0      adele      hello  0.180323
2  daft punk  get lucky -0.811464

📊 Recommendations for user with no history and no favorites (user_id=3):
      artist       song     score
0      adele      hello  0.803297
2  daft punk  get lucky  0.799568
1   coldplay     yellow  0.510078


In [None]:
#!pytest test_simple.py

platform linux -- Python 3.11.12, pytest-8.3.5, pluggy-1.5.0
rootdir: /content
plugins: langsmith-0.3.39, anyio-4.9.0, typeguard-4.4.2
collected 1 item                                                               [0m

test_simple.py [32m.[0m[32m                                                         [100%][0m

