# Import Library

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load Dataset

In [2]:
# Load dataset
df = pd.read_csv("spotify-2023.csv", encoding='latin1')

In [3]:
df

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
948,My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,...,144,A,Major,60,24,39,57,0,8,3
949,Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,...,166,F#,Major,42,7,24,83,1,12,6
950,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,92,C#,Major,80,81,67,4,0,8,6
951,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,97,C#,Major,82,67,77,8,0,12,5


# Pre-Processing

In [4]:
# Tambahkan ID unik untuk setiap lagu
df['track_id'] = range(len(df))

In [5]:
# Simulasi 50 pengguna, masing-masing menyukai 10-30 lagu secara acak
num_users = 50
np.random.seed(42)
user_ids = [f'user_{i+1}' for i in range(num_users)]
user_track_matrix = pd.DataFrame(0, index=user_ids, columns=df['track_id'])

for user in user_track_matrix.index:
    liked_ids = np.random.choice(df['track_id'], size=np.random.randint(10, 30), replace=False)
    user_track_matrix.loc[user, liked_ids] = 1

# Hitung Similarity

In [6]:
# Hitung cosine similarity antar pengguna
user_similarity = cosine_similarity(user_track_matrix.values)
user_similarity_df = pd.DataFrame(user_similarity, index=user_track_matrix.index, columns=user_track_matrix.index)

def recommend_collaborative(user_id, top_n=5):
    if user_id not in user_track_matrix.index:
        return f"User {user_id} tidak ditemukan."

    sim_scores = user_similarity_df[user_id]
    other_users_matrix = user_track_matrix.drop(index=user_id)
    sim_scores_others = sim_scores.drop(index=user_id)

    scores = other_users_matrix.T.dot(sim_scores_others)
    scores = scores / sim_scores_others.sum()

    user_interactions = user_track_matrix.loc[user_id]
    unseen_scores = scores[user_interactions == 0]

    top_recs = unseen_scores.sort_values(ascending=False).head(top_n).index
    return df[df['track_id'].isin(top_recs)][['track_name', 'artist(s)_name']]

# Pengujian

In [7]:
# Contoh penggunaan
if __name__ == "__main__":
    print("Sistem Rekomendasi Lagu - Collaborative Filtering")
    user_input = input("Masukkan user ID (contoh: user_1): ")
    results = recommend_collaborative(user_input)
    print("\nRekomendasi:")
    print(results)

Sistem Rekomendasi Lagu - Collaborative Filtering

Rekomendasi:
                                track_name artist(s)_name
215                            Eyes Closed     Ed Sheeran
225                                Bite Me        ENHYPEN
276      Shoong! (feat. LISA of BLACKPINK)  TAEYANG, Lisa
795  That That (prod. & feat. SUGA of BTS)      PSY, Suga
843                             Grapejuice   Harry Styles
