In [None]:
import pandas as pd
import pickle
from fpgrowth_py import fpgrowth
from pathlib import Path
from datetime import datetime
import yaml

CONFIG_FILE = Path("config.yaml")

with open(CONFIG_FILE, "r") as f:
    cfg = yaml.safe_load(f)

MODEL_FILE = Path(cfg["output"]["model_path"])
MODEL_FILE.parent.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(cfg["dataset"]["file_path"])

playlists_songs = df.groupby("pid")["track_name"].apply(lambda x: list(set(x))).tolist()

freq_items_set, rules = fpgrowth(playlists_songs, minSupRatio=0.02, minConf=0.3)

with open(MODEL_FILE, "wb") as f:
    pickle.dump({
        "freq": freq_items_set,
        "rules": rules,
        "datetime": datetime.now().isoformat(),
        "dataset": cfg["dataset"]["name"],
        }, f)

print(f"Model saved in {MODEL_FILE}")

Model saved in models\model.pkl


In [15]:
freq_items_set

[{'Ayy Ladies'},
 {'Weak'},
 {'Pony'},
 {'The Fix (feat. Jeremih)'},
 {'Ultralight Beam'},
 {'Call Me Maybe'},
 {'Weekend (feat. Miguel)'},
 {'Do I Wanna Know?'},
 {'S&M'},
 {'Right Thurr'},
 {'Still D.R.E.'},
 {'Take It Easy'},
 {'Hall of Fame'},
 {'No Flex Zone'},
 {'Grenade'},
 {'Glad You Came'},
 {'Worth It'},
 {'Everybody'},
 {'Record Year'},
 {'Country Grammar (Hot Shit)'},
 {'Close'},
 {'Stolen Dance'},
 {'Fast Car'},
 {'Come a Little Closer'},
 {'Bang Bang'},
 {'Crooked Smile'},
 {'God, Your Mama, And Me'},
 {'Pipe It Up'},
 {'Beware'},
 {"Night's On Fire"},
 {'Hey Girl'},
 {'Jump - 2015 Remastered Version'},
 {'Thunderstruck'},
 {"I Don't Want This Night to End"},
 {'Safe And Sound'},
 {'I Fall Apart'},
 {'Rude'},
 {'Cyclone - Main'},
 {'Firework'},
 {'Kelly Price (feat. Travis Scott)'},
 {'Wish You Were Here'},
 {'Simple Man'},
 {'Free Bird'},
 {'Ophelia'},
 {'God Gave Me You'},
 {'Some Nights'},
 {'Shots'},
 {'Midnight City'},
 {'Thnks fr th Mmrs'},
 {'Love Me Like You Do - 

In [10]:
len(rules)

8148

In [11]:
rules

[[{'ELEMENT.'}, {'HUMBLE.'}, 0.8769230769230769],
 [{'Gonna Wanna Tonight'}, {'Die A Happy Man'}, 0.8275862068965517],
 [{'Die A Happy Man'}, {'Gonna Wanna Tonight'}, 0.46153846153846156],
 [{'Die A Happy Man'}, {'Drunk On Your Love'}, 0.45],
 [{'Drunk On Your Love'}, {'Die A Happy Man'}, 0.7748344370860927],
 [{'American Country Love Song'}, {'T-Shirt'}, 0.756578947368421],
 [{'Leave The Night On'}, {"Where It's At"}, 0.47520661157024796],
 [{"Where It's At"}, {'Leave The Night On'}, 0.7516339869281046],
 [{'Get Right Witcha'},
  {'Bad and Boujee (feat. Lil Uzi Vert)'},
  0.7516339869281046],
 [{'Get Right Witcha'}, {'T-Shirt'}, 0.803921568627451],
 [{'Slippery (feat. Gucci Mane)'}, {'Get Right Witcha'}, 0.45955882352941174],
 [{'Get Right Witcha'}, {'Slippery (feat. Gucci Mane)'}, 0.8169934640522876],
 [{'Hurricane'}, {'Die A Happy Man'}, 0.7727272727272727],
 [{'Die A Happy Man'}, {'Hurricane'}, 0.4576923076923077],
 [{'Hurricane'}, {'Body Like A Back Road'}, 0.7792207792207793],
 [

In [7]:
def recommend(rules: list, songs: list[str]) -> list[str]:
    recommendations = {}
    
    for antecedent, consequent, confidence in rules:
        if set(antecedent).issubset(songs):
            print(antecedent, consequent, confidence)
            for c in consequent:
                if c not in songs:
                    recommendations[c] = recommendations.get(c, 0) + confidence

    return sorted(recommendations, key=recommendations.get, reverse=True)

In [9]:
recommend(rules, ["Just the Way You Are"])

[]