In [1]:
import pandas as pd

TRAIN_DATASET_PATH = "./dataset/2023_spotify_ds1.csv"
TEST_DATASET_PATH = "./dataset/2023_spotify_ds2.csv"
SONGS_DATASET_PATH = "./dataset/2023_spotify_songs.csv"

Input: list of songs the user likes

Output: list of song recommendations

In [2]:
songs_dataset = pd.read_csv(SONGS_DATASET_PATH)

songs_dataset

Unnamed: 0,artist_name,track_name
0,Sam Hunt,Bottle It Up - Acoustic Mixtape
1,Ryan Gosling,"City Of Stars - From ""La La Land"" Soundtrack"
2,Knife Party,Bonfire
3,Avenged Sevenfold,Welcome to the Family
4,Cheat Codes,Follow You
...,...,...
6969,Pink Floyd,Hey You
6970,Lifehouse,Halfway Gone
6971,Eminem,Won't Back Down
6972,Miley Cyrus,4x4


In [116]:
spotify_playlists_1 = pd.read_csv(TRAIN_DATASET_PATH).dropna()
spotify_playlists_2 = pd.read_csv(TEST_DATASET_PATH).dropna()

In [None]:
spotify_playlists = spotify_playlists_1.append(spotify_playlists_2)

In [20]:
playlist_groups_df = (
    spotify_playlists
    .groupby('pid')
    .agg(
        {
            'track_uri': list,
            'track_name': list
        }
    )
    .reset_index(drop=False)
)

playlist_groups_df

Unnamed: 0,pid,track_uri,track_name
0,115,"[spotify:track:06iMqWThw4w8fTFyccvOwr, spotify...","[Ride Wit Me, Red Solo Cup, Sweet Emotion, Buy..."
1,124,"[spotify:track:3ZAMtgYJFoHwJjFkhkXqKr, spotify...","[Dance (A$$) Remix, Weekend, Clique, Good Girl..."
2,161,"[spotify:track:5sUlaoVcLPk2Wr695jp6dd, spotify...","[Bartender, Just the Way You Are, Smack That -..."
3,362,"[spotify:track:1VCHNbal0VtE2plMRzvoho, spotify...","[Weekend, Misery, Die A Happy Man, The Only Ex..."
4,403,"[spotify:track:06iMqWThw4w8fTFyccvOwr, spotify...","[Ride Wit Me, Weekend, She Knows, Runaway, Bew..."
...,...,...,...
2257,169750,"[spotify:track:6ltPEsP4edATzvinHOzvk2, spotify...","[Still D.R.E., OOOUUU, Still Here, Tuesday (fe..."
2258,169886,"[spotify:track:5dNGORzxFb5z3v7vSAFkTU, spotify...","[Beachin', American Country Love Song, Who I A..."
2259,169931,"[spotify:track:5dNGORzxFb5z3v7vSAFkTU, spotify...","[Beachin', American Country Love Song, It Don'..."
2260,169955,"[spotify:track:0iA1unTbTbDOWUSlbwJ1pS, spotify...","[It Don't Hurt Like It Used To, Fix, Hard To L..."


# FPGrowth

In [70]:
from fpgrowth_py import fpgrowth

sample_df = playlist_groups_df
sample_df

Unnamed: 0,pid,track_uri,track_name
0,115,"[spotify:track:06iMqWThw4w8fTFyccvOwr, spotify...","[Ride Wit Me, Red Solo Cup, Sweet Emotion, Buy..."
1,124,"[spotify:track:3ZAMtgYJFoHwJjFkhkXqKr, spotify...","[Dance (A$$) Remix, Weekend, Clique, Good Girl..."
2,161,"[spotify:track:5sUlaoVcLPk2Wr695jp6dd, spotify...","[Bartender, Just the Way You Are, Smack That -..."
3,362,"[spotify:track:1VCHNbal0VtE2plMRzvoho, spotify...","[Weekend, Misery, Die A Happy Man, The Only Ex..."
4,403,"[spotify:track:06iMqWThw4w8fTFyccvOwr, spotify...","[Ride Wit Me, Weekend, She Knows, Runaway, Bew..."
...,...,...,...
2257,169750,"[spotify:track:6ltPEsP4edATzvinHOzvk2, spotify...","[Still D.R.E., OOOUUU, Still Here, Tuesday (fe..."
2258,169886,"[spotify:track:5dNGORzxFb5z3v7vSAFkTU, spotify...","[Beachin', American Country Love Song, Who I A..."
2259,169931,"[spotify:track:5dNGORzxFb5z3v7vSAFkTU, spotify...","[Beachin', American Country Love Song, It Don'..."
2260,169955,"[spotify:track:0iA1unTbTbDOWUSlbwJ1pS, spotify...","[It Don't Hurt Like It Used To, Fix, Hard To L..."


In [71]:
sample = sample_df.to_dict('list')['track_name']

In [72]:
freq_itemset_sample, rules_sample = fpgrowth(sample, minSupRatio=0.05, minConf=0.15)

# supratio = transaction / all_transactions
# confidence = consequent / (consequent U antecedent)

In [73]:
from typing import Dict, List

class Preprocess():
    def __init__(self, freqItemSet, rules):
        self.freqItemSet = freqItemSet
        self.rules = rules

    def preprocess_itemset(
        self, 
        sample_list: List = None
    ) -> Dict:

        result = [
            {
                'frequent_item_sets': list(item) 
            }
            
            for item in (self.freqItemSet if not sample_list else sample_list)
        ]

        return result
    
    def preprocess_rules(
        self,
        rules: List = None
    ):
        result = [
            {
                'antecedent': list(antecedent),
                'consequent': list(consequent),
                'confidence': confidence
            }

            for antecedent, consequent, confidence in (self.rules if not rules else rules)
        ]

        return result

In [74]:
pre_process_entity = Preprocess(freq_itemset_sample, rules_sample)

freq_itemset = pre_process_entity.preprocess_itemset()
rules = pre_process_entity.preprocess_rules()

In [76]:
rules_df = pd.DataFrame(rules)
freq_itemset_df = pd.DataFrame(freq_itemset)

In [77]:
rules_df

Unnamed: 0,antecedent,consequent,confidence
0,[Crash And Burn],[Night's On Fire],0.402715
1,[Night's On Fire],[Crash And Burn],0.760684
2,[Lose My Mind],[Crash And Burn],0.822581
3,[Crash And Burn],[Lose My Mind],0.461538
4,[I Love This Life],[Crash And Burn],0.743802
...,...,...,...
30798,[One Dance],[Broccoli (feat. Lil Yachty)],0.514874
30799,[Broccoli (feat. Lil Yachty)],[Closer],0.339130
30800,[Closer],[Broccoli (feat. Lil Yachty)],0.320988
30801,[T-Shirt],[Broccoli (feat. Lil Yachty)],0.347826


In [90]:
song_name = 'Broccoli (feat. Lil Yachty)'

mask = freq_itemset_df['frequent_item_sets'].apply(lambda x: song_name in x)

filtered_df = freq_itemset_df[mask]

filtered_df

Unnamed: 0,frequent_item_sets
647,"[Broccoli (feat. Lil Yachty), Cut It (feat. Yo..."
809,"[Broccoli (feat. Lil Yachty), Down In the DM]"
812,"[DEVASTATED, Broccoli (feat. Lil Yachty)]"
838,"[Broccoli (feat. Lil Yachty), Champions]"
890,"[Broccoli (feat. Lil Yachty), OOOUUU]"
...,...
5252,"[Broccoli (feat. Lil Yachty), HUMBLE.]"
5256,"[Broccoli (feat. Lil Yachty), One Dance]"
5257,[Broccoli (feat. Lil Yachty)]
5258,"[Broccoli (feat. Lil Yachty), Closer]"


In [99]:
song_name = 'Crash And Burn'

mask = rules_df['antecedent'].apply(lambda x: song_name in x)

filtered_df = rules_df[mask]

filtered_df

Unnamed: 0,antecedent,consequent,confidence
0,[Crash And Burn],[Night's On Fire],0.402715
3,[Crash And Burn],[Lose My Mind],0.461538
5,[Crash And Burn],[I Love This Life],0.407240
16,[Crash And Burn],[Wasted Time],0.429864
20,[Crash And Burn],[Sleep Without You],0.429864
...,...,...,...
7124,[Crash And Burn],[Leave The Night On],0.678733
7127,[Crash And Burn],"[Die A Happy Man, Leave The Night On]",0.524887
7129,"[Die A Happy Man, Crash And Burn]",[Leave The Night On],0.711656
7131,"[Crash And Burn, Leave The Night On]",[Die A Happy Man],0.773333


In [115]:
filtered_df['consequent'].to_list()

[["Night's On Fire"],
 ['Lose My Mind'],
 ['I Love This Life'],
 ['Wasted Time'],
 ['Sleep Without You'],
 ['Die A Happy Man', 'Confession'],
 ['Confession'],
 ['Die A Happy Man'],
 ['Confession'],
 ['Leave The Night On', 'Sippin’ On Fire'],
 ['Sippin’ On Fire'],
 ['Leave The Night On'],
 ['Sippin’ On Fire'],
 ['Gonna Wanna Tonight'],
 ['Gonna Wanna Tonight', 'Die A Happy Man'],
 ['Die A Happy Man'],
 ['Gonna Wanna Tonight'],
 ["Burnin' It Down"],
 ['Drunk On Your Love'],
 ['Ready Set Roll', 'Leave The Night On'],
 ['Leave The Night On'],
 ['Ready Set Roll'],
 ['Ready Set Roll'],
 ['Die A Happy Man', 'Backroad Song'],
 ['Backroad Song'],
 ['Die A Happy Man'],
 ['Backroad Song'],
 ["Whatever She's Got"],
 ["Whatever She's Got", 'It Goes Like This'],
 ['It Goes Like This'],
 ["Whatever She's Got"],
 ['American Country Love Song'],
 ["Where It's At"],
 ['Hurricane'],
 ['How Country Feels'],
 ['Beat Of The Music'],
 ['Sun Daze', 'Leave The Night On'],
 ['Leave The Night On'],
 ['Sun Daze']