In [214]:
from typing import Tuple
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

from adv_ml_music_recommendation.recommenders.collaborativerecommender import CollaborativeRecommender
from adv_ml_music_recommendation.recommenders.contentbasedrecommender import ContentRecommender
from adv_ml_music_recommendation.recommenders.hybridrecommender import HybridRecommender
from adv_ml_music_recommendation.recommenders.popularityrecommender import PopularityRecommender
from adv_ml_music_recommendation.recommenders.abstractrecommender import AbstractSongRecommender
from adv_ml_music_recommendation.util.data_functions import get_tracks_by_playlist_associate

class RecommenderEvaluator:
    def __init__(self, df_playlist: pd.DataFrame, df_tracks: pd.DataFrame, type: str = 'collaborative'):
        self.df_train_playlist = []
        self.df_test_playlist = []
        self.df_tracks = df_tracks

        # Group by playlist_id and perform train-test split for each playlist
        for playlist_id, group in df_playlist.groupby('playlist_id'):
            if playlist_id % 1000 == 0:
                print(playlist_id)

            if group.shape[0] > 150:
                # Extract track_uris for the current playlist
                track_uris = group['track_uri'].tolist()

                # Perform train-test split
                train_uris, test_uris = train_test_split(track_uris, test_size=0.2, random_state=42)

                # Append the results to the train and test lists
                for uri in train_uris:
                     self.df_train_playlist.append({'playlist_id': playlist_id, 'track_uri': uri})

                for uri in test_uris:
                     self.df_test_playlist.append({'playlist_id': playlist_id, 'track_uri': uri})


        self.df_train_playlist = pd.DataFrame(self.df_train_playlist)
        self.df_test_playlist = pd.DataFrame(self.df_test_playlist)

         # Renumber the filtered playlist_ids to remove gaps
        self.df_train_playlist['playlist_id'] = list(range(1, len(self.df_train_playlist) + 1))
        self.df_test_playlist['playlist_id'] = list(range(1, len(self.df_test_playlist) + 1))

        # Filter the tracks dataframe to keep only tracks that appear in the playlists dataframe:
        # & operator feels like questionable design
        print(self.df_train_playlist)
        #self.df_tracks = self.df_tracks[self.df_tracks['track_uri'].isin(self.df_train_data['track_uri']) & self.df_tracks['track_uri'].isin(self.df_test_data['track_uri'])]
        self.df_tracks = self.df_tracks[self.df_tracks['track_uri'].isin(self.df_train_playlist['track_uri'])]

        if type == 'hybrid':
            self.train_data_model = HybridRecommender(df_playlist=self.df_train_playlist, df_tracks=self.df_tracks)
            self.test_data_model = HybridRecommender(df_playlist=self.df_test_playlist, df_tracks=self.df_tracks)
        elif type == 'collaborative':
            self.train_data_model = CollaborativeRecommender(df_playlist=self.df_train_playlist, df_tracks=self.df_tracks)
            self.test_data_model = CollaborativeRecommender(df_playlist=self.df_test_playlist, df_tracks=self.df_tracks)
        elif type == 'content':
            self.train_data_model = ContentRecommender(df_playlist=self.df_train_playlist, df_tracks=self.df_tracks)
            self.test_data_model = ContentRecommender(df_playlist=self.df_test_playlist, df_tracks=self.df_tracks)
        elif type == 'popularity':
            self.train_data_model = PopularityRecommender(df_playlist=self.df_train_playlist, df_tracks=self.df_tracks)
            self.test_data_model = PopularityRecommender(df_playlist=self.df_test_playlist, df_tracks=self.df_tracks)
        else:
            raise ValueError(
                f"Invalid type: {type}. Must be one of 'hybrid', 'collaborative', 'content', 'popularity'.")


    def evaluate_recommender_for_playlist(self, playlist_id):
        # Get recommendations from the train model
        ranked_recommendations_df = self.train_data_model.recommend_tracks(playlist_id, 100)

        # Extract the recommended track URIs
        recommended_track_uris = ranked_recommendations_df['track_uri'].tolist()

        # Get the ground truth: test track URIs for the playlist
        test_track_uris = get_tracks_by_playlist_associate(self.df_train_playlist, playlist_id)
        #test_track_uris = self.df_test_playlist[self.df_test_playlist['playlist_id'] == playlist_id]['track_uri'].iloc[0]

        # Create binary vectors for precision and recall calculation
        print(len(set(recommended_track_uris) & set(test_track_uris)))
        y_true = [1 if uri in test_track_uris else 0 for uri in recommended_track_uris]
        y_pred = [1] * len(recommended_track_uris)  # All recommendations are predicted as relevant

        # Compute precision and recall
        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        return precision, recall


    def evaluate_model(self):
        total_precision = 0
        total_recall = 0
        num_playlists = len(self.df_test_playlist)

        # Iterate over all playlists in the test_data
        for playlist_id in self.df_test_playlist['playlist_id'].unique():
            if playlist_id % 100 == 0:
                print(playlist_id)

            precision, recall = self.evaluate_recommender_for_playlist(playlist_id)
            total_precision += precision
            total_recall += recall

        # Compute average precision and recall
        avg_precision = total_precision / num_playlists
        avg_recall = total_recall / num_playlists

        return {
            'average_precision': avg_precision,
            'average_recall': avg_recall
        }


In [202]:
df_playlist = pd.read_csv("../../../data/track_playlist_association.csv")
df_tracks = pd.read_csv("../../../data/matched_songs.csv")
print(df_tracks)

        Unnamed: 0  pos        artist_name  \
0                0    1     Britney Spears   
1                0    3  Justin Timberlake   
2                0    8    Destiny's Child   
3                0    9            OutKast   
4                0   19    Destiny's Child   
...            ...  ...                ...   
134707         829   24        The Outlaws   
134708         829  131        The Outlaws   
134709         870   69    History Invades   
134710         887    5          Limi-T 21   
134711         935   32        Rod Stewart   

                                   track_uri  \
0       spotify:track:6I9VzXrHxO9rA9A5euc8Ak   
1       spotify:track:1AWQoqb9bSvzTjaLralEkT   
2       spotify:track:7H6ev70Weq6DdpZyyTmUXk   
3       spotify:track:2PpruBYCo4H7WOBJ7Q2EwM   
4       spotify:track:4pmc2AxSEq6g7hPVlJCPyP   
...                                      ...   
134707  spotify:track:38Zq6XbsyIcwI0Kn1ikPlV   
134708  spotify:track:68nYzKdsOUiiVAuyAc5y3U   
134709  spotify

In [215]:
evaluator = RecommenderEvaluator(df_playlist=df_playlist, df_tracks=df_tracks)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
14000
15000
16000
17000
18000
19000
21000
22000
23000
25000
26000
27000
28000
29000
31000
32000
33000
34000
35000
36000
38000
39000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
64000
65000
66000
67000
68000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
93000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
115000
116000
117000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
133000
135000
136000
138000
140000
141000
142000
143000
144000
145000
146000
147000
148000
150000
152000
153000
154000
155000
156000
157000
158000
159000
160000
161000
162000
163000
164000
167000
168000
169000
170000
171000
172000
174000
175000
176000
177000
178000
179000
180

In [210]:
evaluator.evaluate_model()

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
100
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
200
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
300
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
400
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


{'average_precision': 0.0, 'average_recall': 0.0}