In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

try:
    df = pd.read_csv('spotify_top_songs_audio_features.csv')
except FileNotFoundError:
    print("Error: File not found.")

# Preprocessing the data

# Removing Missing Values
missing_values = df.isnull().sum()
print("Missing values:")
print(missing_values)

df.dropna(inplace=True)

print("\nShape of DataFrame after removing missing values:", df.shape)

# Removing Duplicates
df.drop_duplicates(subset=['track_name', 'artist_names'], keep='first', inplace=True)

print("Shape of DataFrame before removing duplicates:", df.shape)

Missing values:
id                  0
artist_names        0
track_name          0
source              0
key                 0
mode                0
time_signature      0
danceability        0
energy              0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
loudness            0
tempo               0
duration_ms         0
weeks_on_chart      0
streams             0
dtype: int64

Shape of DataFrame after removing missing values: (6513, 19)
Shape of DataFrame before removing duplicates: (5501, 19)


In [2]:
# TF-IDF Matrix
selected_features = ['danceability', 'speechiness', 'valence', 'tempo', 'weeks_on_chart', 'streams']
df['combined_features'] = df[selected_features].astype(str).apply(' '.join, axis=1)
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'].values.astype('U'))

# Cosine Similarity Matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


In [3]:
# Music Recommender
def recommend_music(title=None, artist=None, num_recommendations=10, cosine_sim=cosine_sim):
    if title is None and artist is None:
        return "Please provide either the track name, the artist name, or both."
    
    if title is not None and artist is not None:
        # Recommend based on both track and artist
        mask = (df['track_name'] == title) & (df['artist_names'] == artist)
    elif title is None and artist is not None:
        # Recommend based on artist alone
        mask = df['artist_names'] == artist
    else:
        # Recommend based on track alone
        mask = df['track_name'] == title

    if not mask.any():
        return "Track or artist not found in the dataset."

    idx = df[mask].index
    if len(idx) == 0:
        return "Track or artist not found in the dataset."

    idx = idx[0]  # Get the first index if multiple matches found
    if idx >= len(cosine_sim):
        return "Index out of bounds."

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    # Filter Out-of-Bounds Indices
    valid_indices = [i[0] for i in sim_scores if i[0] < len(df)]
    recommended_tracks = df.iloc[valid_indices]['track_name']
    recommended_artists = df.iloc[valid_indices]['artist_names']
    similarity_scores = [i[1] for i in sim_scores if i[0] < len(df)]
    return list(zip(recommended_tracks, recommended_artists, similarity_scores))

# Test (for both track and artist)
recommended_music = recommend_music(title='IDOL', artist='BTS', num_recommendations=10)
if isinstance(recommended_music, str):
    print(recommended_music)
else:
    for i, (track, artist, score) in enumerate(recommended_music, 1):
        print(f"{i}. {track} by {artist}, Similarity Score: {score:.2f}")

# Test (for just the artist)
recommended_music = recommend_music(artist='BTS', num_recommendations=10)
if isinstance(recommended_music, str):
    print(recommended_music)
else:
    for i, (track, artist, score) in enumerate(recommended_music, 1):
        print(f"{i}. {track} by {artist}, Average Similarity Score: {score:.2f}")

1. Blah Blah Blah by Armin van Buuren, Similarity Score: 0.26
2. Am I Dreaming (Metro Boomin & A$AP Rocky, Roisee) by Metro Boomin, A$AP Rocky, Roisee, Similarity Score: 0.18
3. Hasta el Amanecer by Nicky Jam, Similarity Score: 0.18
4. Magic In The Hamptons (feat. Lil Yachty) by Social House, Lil Yachty, Similarity Score: 0.18
5. Told You So by Paramore, Similarity Score: 0.17
6. Sharing Locations (feat. Lil Baby & Lil Durk) by Meek Mill, Lil Durk, Lil Baby, Similarity Score: 0.17
7. I.F.L.Y. by Bazzi, Similarity Score: 0.17
8. Rudolph The Red-Nosed Reindeer by Burl Ives, Similarity Score: 0.17
9. Freak by Doja Cat, Similarity Score: 0.17
10. A Different Way (with Lauv) by DJ Snake, Lauv, Similarity Score: 0.17
1. GOTTI by 6ix9ine, Average Similarity Score: 0.19
2. ON OFF (feat. Maître Gims) by Shirin David, GIMS, Average Similarity Score: 0.19
3. Quit (feat. Ariana Grande) by Cashmere Cat, Ariana Grande, Average Similarity Score: 0.18
4. Hurt You by The Weeknd, Gesaffelstein, Average 

In [4]:
recommended_music = recommend_music(artist='Ariana Grande', num_recommendations=10)
if isinstance(recommended_music, str):
    print(recommended_music)
else:
    for i, (track, artist, score) in enumerate(recommended_music, 1):
        print(f"{i}. {track} by {artist}, Average Similarity Score: {score:.2f}")

1. Best Of Me by BTS, Average Similarity Score: 0.24
2. America by Logic, Black Thought, Chuck D, Big Lenbo, No ID, Average Similarity Score: 0.17
3. Over The Top (feat. Drake) by Smiley, Drake, Average Similarity Score: 0.17
4. Nummer 1 by Capital Bra, Samra, Average Similarity Score: 0.16
5. POPOPOP by Gambi, Average Similarity Score: 0.16
6. Danger (Spider) (Offset & JID) by Offset, JID, Average Similarity Score: 0.16
7. Angst by Loredana, Rymez, Average Similarity Score: 0.16
8. La_Original.mp3 by Emilia, TINI, Average Similarity Score: 0.16
9. Remember (with David Guetta) by Becky Hill, David Guetta, Average Similarity Score: 0.16
10. The Heart Part 5 by Kendrick Lamar, Average Similarity Score: 0.16


In [5]:
recommended_music = recommend_music(artist='BLACKPINK', num_recommendations=10)
if isinstance(recommended_music, str):
    print(recommended_music)
else:
    for i, (track, artist, score) in enumerate(recommended_music, 1):
        print(f"{i}. {track} by {artist}, Similarity Score: {score:.2f}")

1. Parce (feat. Justin Quiles) by Maluma, Lenny Tavárez, Justin Quiles, Similarity Score: 0.24
2. Lonely (with Jonas Brothers) by Diplo, Jonas Brothers, Similarity Score: 0.21
3. Gatita by Bellakath, Similarity Score: 0.20
4. Light by San Holo, Similarity Score: 0.19
5. You Are In Love (Taylor's Version) by Taylor Swift, Similarity Score: 0.19
6. Deu Onda by MC G15, Similarity Score: 0.19
7. Before You Go by Lewis Capaldi, Similarity Score: 0.19
8. LOYALTY. FEAT. RIHANNA. by Kendrick Lamar, Rihanna, Similarity Score: 0.18
9. Ella - Remix by Boza, Lunay, Lenny Tavárez, Juhn, Beéle, Similarity Score: 0.16
10. Big Body Benz by Bonez MC, Similarity Score: 0.16


In [6]:
recommended_music = recommend_music(title='Peaches (feat. Daniel Caesar & Giveon)', artist='Justin Bieber, Daniel Caesar, Giveon', num_recommendations=10)
if isinstance(recommended_music, str):
    print(recommended_music)
else:
    for i, (track, artist, score) in enumerate(recommended_music, 1):
        print(f"{i}. {track} by {artist}, Similarity Score: {score:.2f}")

1. Younger Now by Miley Cyrus, Similarity Score: 0.22
2. You & Me by JENNIE, Similarity Score: 0.22
3. Signs by Drake, Similarity Score: 0.20
4. B.S. (feat. H.E.R.) by Jhené Aiko, H.E.R., Similarity Score: 0.17
5. IGOR'S THEME by Tyler, The Creator, Similarity Score: 0.17
6. GIVENCHY by Duki, Similarity Score: 0.17
7. Tesla (feat. Sfera Ebbasta & DrefGold) by Capo Plaza, Sfera Ebbasta, DrefGold, Similarity Score: 0.16
8. Mood Ring by Lorde, Similarity Score: 0.16
9. Overtime by Zach Bryan, Similarity Score: 0.16
10. Jiwa Yang Bersedih by Ghea Indrawari, Similarity Score: 0.16


In [9]:
def get_user_input():
    title = input("Enter the track name (or press Enter to skip): ").strip()
    artist = input("Enter the artist name (or press Enter to skip): ").strip()
    return title, artist

def recommend_music_from_input(num_recommendations=10, cosine_sim=cosine_sim):
    title, artist = get_user_input()

    if not title and not artist:
        print("Please provide either the track name, the artist name, or both.")
        return

    recommended_music = recommend_music(title=title, artist=artist, num_recommendations=num_recommendations, cosine_sim=cosine_sim)
    if isinstance(recommended_music, str):
        print(recommended_music)
    else:
        if not recommended_music:
            print("No recommendations found.")
            return
        for i, (track, artist, score) in enumerate(recommended_music, 1):
            print(f"{i}. {track} by {artist}, Similarity Score: {score:.2f}")

# Test
recommend_music_from_input(num_recommendations=10)

Enter the track name (or press Enter to skip):  IDOL
Enter the artist name (or press Enter to skip):  BTS


1. Blah Blah Blah by Armin van Buuren, Similarity Score: 0.26
2. Am I Dreaming (Metro Boomin & A$AP Rocky, Roisee) by Metro Boomin, A$AP Rocky, Roisee, Similarity Score: 0.18
3. Hasta el Amanecer by Nicky Jam, Similarity Score: 0.18
4. Magic In The Hamptons (feat. Lil Yachty) by Social House, Lil Yachty, Similarity Score: 0.18
5. Told You So by Paramore, Similarity Score: 0.17
6. Sharing Locations (feat. Lil Baby & Lil Durk) by Meek Mill, Lil Durk, Lil Baby, Similarity Score: 0.17
7. I.F.L.Y. by Bazzi, Similarity Score: 0.17
8. Rudolph The Red-Nosed Reindeer by Burl Ives, Similarity Score: 0.17
9. Freak by Doja Cat, Similarity Score: 0.17
10. A Different Way (with Lauv) by DJ Snake, Lauv, Similarity Score: 0.17


In [11]:
recommend_music_from_input(num_recommendations=10)


Enter the track name (or press Enter to skip):  enough for you
Enter the artist name (or press Enter to skip):  Olivia Rodrigo


1. Owe Me by Big Sean, Similarity Score: 0.18
2. Lost In Japan by Shawn Mendes, Similarity Score: 0.17
3. Escape From LA by The Weeknd, Similarity Score: 0.17
4. Empty Space by James Arthur, Similarity Score: 0.17
5. United In Grief by Kendrick Lamar, Similarity Score: 0.17
6. What's Free (feat. Rick Ross & Jay Z) by Meek Mill, JAY-Z, Rick Ross, Similarity Score: 0.17
7. Mios mit Bars by Luciano, Similarity Score: 0.17
8. a p p l y i n g . p r e s s u r e by J. Cole, Similarity Score: 0.16
9. p u n c h i n ‘ . t h e . c l o c k by J. Cole, Similarity Score: 0.16
10. Matt Hardy 999 by Trippie Redd, Juice WRLD, Similarity Score: 0.16


In [12]:
recommend_music_from_input(num_recommendations=10)

recommend_music_from_input(num_recommendations=10)

recommend_music_from_input(num_recommendations=10)

recommend_music_from_input(num_recommendations=10)

recommend_music_from_input(num_recommendations=10)

Enter the track name (or press Enter to skip):  ON
Enter the artist name (or press Enter to skip):  BTS


1. Oh Juliana by Niack, Similarity Score: 0.23
2. Bandido by Zé Felipe, MC Mari, Similarity Score: 0.22
3. Bad Reputation (feat. Joe Janiak) by Avicii, Joe Janiak, Similarity Score: 0.21
4. Mbappe by Eladio Carrion, Similarity Score: 0.21
5. Dark Horse by Katy Perry, Juicy J, Similarity Score: 0.21
6. Confessions of a Dangerous Mind by Logic, Similarity Score: 0.17
7. March 14 by Drake, Similarity Score: 0.16
8. INTERNATIONAL GANGSTAS by Farid Bang, CAPO, 6ix9ine, SCH, Similarity Score: 0.15
9. Shining (feat. Beyoncé & Jay-Z) by DJ Khaled, Beyoncé, JAY-Z, Similarity Score: 0.15
10. DJ Got Us Fallin' In Love (feat. Pitbull) by USHER, Pitbull, Similarity Score: 0.15


Enter the track name (or press Enter to skip):  Dark Horse
Enter the artist name (or press Enter to skip):  Katy Perry, Juicy J


1. Ring (feat. Kehlani) by Cardi B, Kehlani, Similarity Score: 0.28
2. blazed (feat. Pharrell Williams) by Ariana Grande, Pharrell Williams, Similarity Score: 0.21
3. In The Stars by Benson Boone, Similarity Score: 0.21
4. RUNNING OUT OF TIME by Tyler, The Creator, Similarity Score: 0.21
5. Rich Baby Daddy (feat. Sexyy Red & SZA) by Drake, Sexyy Red, SZA, Similarity Score: 0.21
6. Into You by Ariana Grande, Similarity Score: 0.21
7. xanny by Billie Eilish, Similarity Score: 0.20
8. Louder than bombs by BTS, Similarity Score: 0.16
9. VULTURES by ¥$, Kanye West, Ty Dolla $ign, Similarity Score: 0.16
10. KEYS TO MY LIFE by ¥$, Kanye West, Ty Dolla $ign, Similarity Score: 0.16


Enter the track name (or press Enter to skip):  Into You
Enter the artist name (or press Enter to skip):  Ariana Grande


Index out of bounds.


Enter the track name (or press Enter to skip):  Louder than bombs
Enter the artist name (or press Enter to skip):  BTS


1. Some Kind Of Drug (feat. Marc E. Bassy) by G-Eazy, Marc E. Bassy, Similarity Score: 0.25
2. Nummer unterdrückt by Bonez MC, RAF Camora, Similarity Score: 0.21
3. IDOL by BTS, Nicki Minaj, Similarity Score: 0.21
4. High Infidelity by Taylor Swift, Similarity Score: 0.21
5. China by Anuel AA, Daddy Yankee, KAROL G, Ozuna, J Balvin, Similarity Score: 0.20
6. Zero - From the Original Motion Picture "Ralph Breaks The Internet" by Imagine Dragons, Similarity Score: 0.17
7. AP - Music from the film Boogie by Pop Smoke, Similarity Score: 0.17
8. Tommy Lee (feat. Post Malone) by Tyla Yaweh, Post Malone, Similarity Score: 0.17
9. Tragic Endings (feat. Skylar Grey) by Eminem, Skylar Grey, Similarity Score: 0.17
10. This Christmas by Donny Hathaway, Similarity Score: 0.16


Enter the track name (or press Enter to skip):  DDU-DU DDU-DU
Enter the artist name (or press Enter to skip):  BLACKPINK


1. Nicht verdient by Capital Bra, Loredana, Similarity Score: 0.32
2. Parasite Eve by Bring Me The Horizon, Similarity Score: 0.19
3. Astronauts by Future, Juice WRLD, Similarity Score: 0.19
4. Perfect Strangers by Lil Wayne, Similarity Score: 0.18
5. UFF by Veysel, Gzuz, Similarity Score: 0.18
6. Alarm by Anne-Marie, Similarity Score: 0.18
7. Top Off (feat. JAY Z, Future & Beyoncé) by DJ Khaled, JAY-Z, Future, Beyoncé, Similarity Score: 0.18
8. Sad Beautiful Tragic (Taylor's Version) by Taylor Swift, Similarity Score: 0.18
9. The Business by Tiësto, Similarity Score: 0.17
10. Boyfriend by Dove Cameron, Similarity Score: 0.17
