In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from pathlib import Path


In [54]:
# we originally tried to load this into google co-lab, however to access the csv, we were going to have to give google co-lab access to our google drive, so we went the jupyter notebook/VS Code route instead

# Load the dataset
file_path = Path('Resources/spotify_dataset.csv')
songs_df = pd.read_csv(file_path)
songs_df.shape

(41099, 20)

In [55]:
# Remove duplicate songs
songs_df = songs_df.drop_duplicates(subset=['track', 'artist'], keep='first')

print(f"DataFrame after removing duplicates: {songs_df.shape}")

DataFrame after removing duplicates: (39996, 20)


In [56]:
column_names = songs_df.columns.tolist()
column_names

['track',
 'artist',
 'uri',
 'danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'duration_ms',
 'time_signature',
 'chorus_hit',
 'sections',
 'popularity',
 'decade']

In [57]:
songs_df['decade'] = songs_df['decade'].replace({
    '60s': 1960.0,
    '70s': 1970.0,
    '80s': 1980.0,
    '90s': 1990.0,
    '00s': 2000.0, 
    '10s': 2010.0
}).astype(float)

  songs_df['decade'] = songs_df['decade'].replace({


In [58]:
# Select relevant features for the model

features = [
# 'track',
#  'artist',
#  'uri',
 'danceability',
 'energy',
#  'key',
#  'loudness',
#  'mode',
#  'speechiness',
 'acousticness',
 'instrumentalness',
#  'liveness',
#  'valence',
 'tempo',
#  'duration_ms',
#  'time_signature',
#  'chorus_hit',
#  'sections',
 'popularity',
#  'decade'
 ]
X = songs_df[features]


In [59]:
# Scale the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


# # Split the data (not strictly needed for KNN, but good practice)
# X_train, X_test, _, _ = train_test_split(X_scaled, songs_df['track'], test_size=0.2, random_state=42)

In [60]:
# Train a k-Nearest Neighbors model
model = NearestNeighbors(n_neighbors=15, algorithm='ball_tree') 
model.fit(X_scaled)

In [61]:
def recommend_songs(song_title, artist_name, num_recommendations=5):

    # Filter the dataset for the input song and artist
    song_row = songs_df[(songs_df['track'] == song_title) & (songs_df['artist'] == artist_name)]
    
    if song_row.empty:
        print(f"Error: Song '{song_title}' by '{artist_name}' not found in the dataset.")
        return

    # Get the index and features of the input song
    song_index = song_row.index[0]
    song_features = X_scaled[song_index].reshape(1, -1)

    # Find nearest neighbors
    distances, indices = model.kneighbors(song_features)
    
    # Exclude the input song and filter by different artists
    recommendations = []
    for i in indices.flatten():
        if songs_df.loc[i, 'artist'] != artist_name:
            recommendations.append((songs_df.loc[i, 'track'], songs_df.loc[i, 'artist']))
        if len(recommendations) >= num_recommendations:
            break

    # Print recommendations
    print(f"Recommendations for '{song_title}' by {artist_name}:")
    for track, artist in recommendations:
        print(f"- {track} by {artist}")

recommend_songs("Take It Easy", "Eagles", 8)

Recommendations for 'Take It Easy' by Eagles:
- Mary Jane by Rick James
- Hace Sonar la Giralda (Tientos-Tangos) by Chiquetete
- The Beehive State by Randy Newman
- Boa Me (feat. Ed Sheeran & Mugeez) by Fuse ODG
- You Make Me Wanna... by Usher
- R.O.C.K. In The U.S.A. (A Salute to 60's Rock) by John Mellencamp
- Looks Like We Made It by Barry Manilow
- Eisteddfod by Anweledig


In [69]:
recommend_songs("Your Body Is A Wonderland", "John Mayer", 8)

Recommendations for 'Your Body Is A Wonderland' by John Mayer:
- Freaks by French Montana Featuring Nicki Minaj
- Different by Egypt Central
- Our Last Night Together by Arthur Russell
- Thrash Till Death by Destruction
- Metal Drums (Regis remix) by Black Asteroid
- Willst du mich für immer - Dance Remix by Michelle
- Closer by Ne-Yo
- Baby I'm Back by Baby Bash Featuring Akon


In [66]:
recommend_songs('Classical Gas','Mason Williams',8)

Recommendations for 'Classical Gas' by Mason Williams:
- Main Title (1966 Album) by Jerry Goldsmith
- I Stand Accused by Isaac Hayes
- One Heartbeat by Smokey Robinson
- Rubber Bullets by 10cc
- The Bleeding Baroness by Candlemass
- Love Cries by Stage Dolls
- There Goes The Fear by Doves
- Ojalá by Silvio Rodríguez


In [65]:
recommend_songs('Subterranean Homesick Blues','Bob Dylan',8)

Recommendations for 'Subterranean Homesick Blues' by Bob Dylan:
- Feel So Fine by Johnny Preston
- Pick Up The Phone by Young Thug And Travis Scott Featuring Quavo
- Ophelia by The Lumineers
- Talk To Me by Sunny & The Sunglows
- A Moment Like This by Kelly Clarkson
- Please Don't Ever Leave Me by The Cyrkle
- Chattanooga Shoe Shine Boy by Freddy Cannon
- We Built This City by Starship


In [64]:
recommend_songs('Yesterday','The Beatles',8)

Recommendations for 'Yesterday' by The Beatles:
- A World Of Our Own by The Seekers
- Ela É Carioca by MPB4
- I Can't Let Go by The Hollies
- De No Olvidar by Alfredo Zitarrosa
- International Harvester by Craig Morgan
- Red Roses For A Blue Lady by Vic Dana
- Rawhide - Live by Lester Flatt
- Secretary by Betty Wright
