<a href="https://colab.research.google.com/github/raeven57/songs/blob/main/Music_Recommendation_content_based.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import Image, display

In [4]:
csv_file_path = 'spotify_songs_of_different_genres.csv'
df = pd.read_csv(csv_file_path)

df['genres'] = df['genres'].apply(lambda x: x.split(', '))

mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df['genres'])

genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_, index=df['track_id'])

cosine_sim = cosine_similarity(genre_df, genre_df)

cosine_sim_df = pd.DataFrame(cosine_sim, index=genre_df.index, columns=genre_df.index)

def recommend_songs(track_id, cosine_sim_df, df, num_recommendations=10):

    idx = df.index[df['track_id'] == track_id].tolist()[0]

    sim_scores = list(enumerate(cosine_sim_df.iloc[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:num_recommendations+1]

    song_indices = [i[0] for i in sim_scores]

    recommended_songs = df.iloc[song_indices][['track_id', 'track_name', 'artist_name', 'genres', 'album_cover_image']]

    for _, row in recommended_songs.iterrows():
        print(f"Track ID: {row['track_id']}")
        print(f"Track Name: {row['track_name']}")
        print(f"Artist Name: {row['artist_name']}")
        print(f"Genres: {', '.join(row['genres'])}")
        print(f"Album Cover Image: {row['album_cover_image']}")
        display_album_cover(row['album_cover_image'])
        print("\n")

    return recommended_songs

# Function to display album cover images
def display_album_cover(url):
    try:
        display(Image(url=url, width=150, height=150))
    except ImportError:
        print(f"Album Cover: {url}")

track_id_example = '0i5QVxsK3IvEDbUjTA64Li'
recommended_songs = recommend_songs(track_id_example, cosine_sim_df, df)

Track ID: 2mKz0wGscPw1xnTvrTlQUi
Track Name: Orange Blood
Artist Name: Mt. Joy
Genres: pov: indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b273538afc814477092a0781be50




Track ID: 2fpDrL2Vpee0JnM6AoUFvl
Track Name: Silver Lining
Artist Name: Mt. Joy
Genres: pov: indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b273f40e5c4f60e287cd1ca6f3c8




Track ID: 0SnCZci0wyVj0RZXknNaWq
Track Name: Lemon Tree
Artist Name: Mt. Joy
Genres: pov: indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b2739f9fc9a902efb92818af8fdd




Track ID: 6eLUwUJbXC9E94RuP5Qg4Y
Track Name: Don't Let It Bring You Down - Recorded at Spotify Studios NYC
Artist Name: Mt. Joy
Genres: pov: indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b273f361b162009bad99d85db417




Track ID: 3E7z1rLE9rR2rhTW3TeXGo
Track Name: Highway Queen
Artist Name: Mt. Joy
Genres: pov: indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b2735abe308e64e9713d45fc2658




Track ID: 3BIBnyt40zWjPCNqRyGwPp
Track Name: This Side of Paradise - slowed
Artist Name: Coyote Theory
Genres: pov: indie
Album Cover Image: https://i.scdn.co/image/ab67616d0000b273c2d3005800e3a80004c10c9b




Track ID: 3xtiXNDbSKxy20I7D6vFUg
Track Name: Soldier, Poet, King
Artist Name: The Oh Hellos
Genres: folk-pop, pov: indie, san marcos tx indie, stomp and holler
Album Cover Image: https://i.scdn.co/image/ab67616d0000b27339521e60d8d11783d9f5e3b1




Track ID: 3IznIgmXtrUaoPWpQTy5jB
Track Name: Not Allowed
Artist Name: TV Girl
Genres: pov: indie
Album Cover Image: https://i.scdn.co/image/ab67616d0000b27332f5fec7a879ed6ef28f0dfd




Track ID: 22FniXvTKV9IC6IhxCpYve
Track Name: Hello Kitty
Artist Name: Jazmin Bean
Genres: pov: indie
Album Cover Image: https://i.scdn.co/image/ab67616d0000b273eea4d8ef4ad2b9b0a7f00d37




Track ID: 474uVhyGgK5MtY9gMcDgGl
Track Name: It's Called: Freefall
Artist Name: Rainbow Kitten Surprise
Genres: pov: indie
Album Cover Image: https://i.scdn.co/image/ab67616d0000b2739529217e48fc8bab89c8a1f9






In [5]:
!pip install pandas scikit-learn matplotlib surprise

import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.neighbors import NearestNeighbors
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split as surprise_train_test_split
from surprise.model_selection import cross_validate, GridSearchCV
import matplotlib.pyplot as plt

df = pd.read_csv('spotify_songs_of_different_genres.csv')

df['genres'] = df['genres'].apply(lambda x: x.split(', '))

mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df['genres'])

genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_, index=df['track_id'])

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

cosine_sim = cosine_similarity(genre_df, genre_df)
cosine_sim_df = pd.DataFrame(cosine_sim, index=genre_df.index, columns=genre_df.index)

# Defining the Content-Based Filtering recommendation function
def recommend_songs_content_based(track_id, cosine_sim_df, df, num_recommendations=10):
    idx = df.index[df['track_id'] == track_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim_df.iloc[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    song_indices = [i[0] for i in sim_scores]
    recommended_songs = df.iloc[song_indices][['track_id', 'track_name', 'artist_name', 'genres', 'album_cover_image']]
    return recommended_songs

# Collaborative Filtering using SVD
reader = Reader(rating_scale=(df['play_counts'].min(), df['play_counts'].max()))
data = Dataset.load_from_df(df[['track_id', 'artist_uri', 'play_counts']], reader)
param_grid = {'n_factors': [50, 100, 150], 'lr_all': [0.002, 0.005, 0.01], 'reg_all': [0.02, 0.05, 0.1]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

svd = SVD()
svd.fit(trainset)
svd_predictions = svd.test(testset)
best_svd = gs.best_estimator['rmse']

# Train and test split
trainset, testset = surprise_train_test_split(data, test_size=0.2, random_state=42)

# Train the best SVD model
best_svd.fit(trainset)
svd_predictions = best_svd.test(testset)

# K-Nearest Neighbors (KNN)
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(genre_df)
knn_distances, knn_indices = knn.kneighbors(genre_df, n_neighbors=11)

# Define the KNN recommendation function
def recommend_songs_knn(track_id, knn_indices, df, num_recommendations=10):
    idx = df.index[df['track_id'] == track_id].tolist()[0]
    song_indices = knn_indices[idx][1:num_recommendations+1]
    recommended_songs = df.iloc[song_indices][['track_id', 'track_name', 'artist_name', 'genres', 'album_cover_image']]
    return recommended_songs

# Evaluate the models
def evaluate_model(model_name, recommend_func, test_data, knn_indices=None):
    y_true = []
    y_pred = []

    for idx, row in test_data.iterrows():
        track_id = row['track_id']
        actual_artist = row['artist_name']

        if model_name == 'Content-Based':
            recommended_songs = recommend_func(track_id, cosine_sim_df, df)
        elif model_name == 'KNN':
            recommended_songs = recommend_func(track_id, knn_indices, df)
        elif model_name == 'SVD':
            continue  # We'll handle SVD evaluation separately

        recommended_artists = recommended_songs['artist_name'].tolist()

        y_true.append(1 if actual_artist in recommended_artists else 0)
        y_pred.append(1)

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    return precision, recall, f1

# Evaluate the Content-Based model
precision_content, recall_content, f1_content = evaluate_model('Content-Based', recommend_songs_content_based, test_data)

# Evaluate the KNN model
precision_knn, recall_knn, f1_knn = evaluate_model('KNN', recommend_songs_knn, test_data, knn_indices)

# Evaluate the SVD model
svd_true = [int(pred.r_ui > 0) for pred in svd_predictions]  # Binarize actual ratings
svd_pred = [int(pred.est > 0) for pred in svd_predictions]   # Binarize predicted ratings

precision_svd = precision_score(svd_true, svd_pred, average='macro')
recall_svd = recall_score(svd_true, svd_pred, average='macro')
f1_svd = f1_score(svd_true, svd_pred, average='macro')

threshold = sum(svd_true) / len(svd_true)
svd_pred_binary = [1 if pred >= threshold else 0 for pred in svd_pred]

precision_svd = precision_score(svd_true, svd_pred_binary, average='macro')
recall_svd = recall_score(svd_true, svd_pred_binary, average='macro')
f1_svd = f1_score(svd_true, svd_pred_binary, average='macro')

models = ['Content-Based', 'KNN', 'SVD']
precisions = [precision_content, precision_knn, precision_svd]
recalls = [recall_content, recall_knn, recall_svd]
f1_scores = [f1_content, f1_knn, f1_svd]

x = range(len(models))

plt.figure(figsize=(12, 6))
plt.bar(x, precisions, width=0.2, label='Precision', align='center')
plt.bar(x, recalls, width=0.2, label='Recall', align='edge')
plt.bar(x, f1_scores, width=0.2, label='F1-Score', align='edge', alpha=0.7)
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Model Comparison')
plt.xticks(x, models)
plt.legend()
plt.show()



Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357264 sha256=1d5e54e636484c8ac58c65464ca6b33ef6a3d06892798239141d976e5316a513
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Install

KeyError: 'play_counts'