## Imports

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances

## Dataframe Loading and Song Selection

In [None]:
# Load the dataframe
df = pd.read_csv('data (3).csv')

In [None]:
df.head()

In [None]:
# Enter a song title
song_title = input("Enter a song title to find similar songs: ")

# Normalize the input for case and remove leading or trailing whitespace
target_song_title = song_title.lower().strip()

# Check if the target_song_title is in the dataset
while target_song_title not in df['name'].str.lower().str.strip().values:
    print("- - - - - ")
    print(f"Error: Song '{target_song_title}' not found in the dataset.")
    target_song_title = input("Enter a song title to find similar songs: ")
    target_song_title = target_song_title.lower().strip()

In [None]:
# Retrieve all artists for a particular song title and sort alphabetically
available_artists = df.loc[df['name'].str.lower().str.strip() == target_song_title, 'artists'].unique()
available_artists.sort(axis=0)
print("Available artists:")
for i, artist in enumerate(available_artists):
    print(f"{i}. {artist}")

# Prompt user for artist input
artist_choice = input("Select the artist (enter the corresponding number): ")

# Error handling
try:
    artist_choice = int(artist_choice)
except:
    pass

while type(artist_choice) != int or artist_choice < 0 or artist_choice >= len(available_artists):
    print("- - - - - ")
    print("Invalid choice. Please select a valid artist.")
    
    for i, artist in enumerate(available_artists):
        print(f"{i}. {artist}")
    
    artist_choice = input("Select the artist (enter the corresponding number): ")
    
    try:
        artist_choice = int(artist_choice)
    except:
        pass

# Assign artist choice to variable
target_artist = available_artists[artist_choice]

In [None]:
# # Retrieve all years for an artists for a particular song title and sort alphabetically
available_years = df.loc[(df['name'].str.lower().str.strip() == target_song_title) & (df['artists'] == target_artist), 'year'].unique()
available_years.sort(axis=0)

print(f"Available song years for {target_artist}:")
for i, year in enumerate(available_years):
    print(f"{i}. {year}")

# Prompt user for song year
year_choice = input("Select the song year (enter the corresponding number): ")

# Error handling
try:
    year_choice = int(year_choice)
except:
    pass

while type(year_choice) != int or year_choice < 0 or year_choice >= len(available_years):
    print("- - - - - ")
    print(f"Invalid choice. Please select a valid song year for {target_artist}.")
    for i, year in enumerate(available_years):
        print(f"{i}. {year}")
    year_choice = input("Select the song year (enter the corresponding number): ")

    try:
        year_choice = int(year_choice)
    except:
        pass

# Assign year choice to variable
target_year = available_years[year_choice]

In [None]:
# Retrieve the target song based on name, artist, and year
target_song = df[(df['name'].str.lower().str.strip() == target_song_title) & (df['artists'] == target_artist) & (df['year'] == target_year)]

In [None]:
target_song

In [None]:
# Retrieve index for selected song
target_song_index = target_song.index
target_song_index

## Preprocessing

In [None]:
# Index dataframe by song names and drop unnecessary columns
df = df.set_index('name')
df = df.drop(columns=['artists', 'id', 'release_date'])

# Preprocess all other features and create scaled dataframe
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)

df_scaled.head()

In [None]:
# Retrieve specific song features via index
target_song = df_scaled.iloc[target_song_index]
target_song

## Calculate Euclidean Distances

In [None]:
# Calculate euclidean distances between the target song and all other songs
distances = euclidean_distances(target_song, df_scaled)

In [None]:
distances

In [None]:
len(distances)

In [None]:
# Insert calculated distances from target song into original dataframe at column position 0
df_distance_col = distances[0]
df.insert(0, 'distance', df_distance_col)
df

## Sort Dataframe by Distance to Selected Song

In [None]:
# Sort the dataframe by distance
df_sorted = df.sort_values(by='distance')

df_sorted.head()

In [None]:
df_sorted.describe()

In [None]:
# Display the top n most similar songs based on minimal distance to selected song
n = 5
df_sorted.head(n+1)

Play Song off Spotify Browser

In [None]:
import json
import spotipy
import webbrowser

## Get user information if they have spotify prenium 

clientID = '2cd83c60b7b64cf1914230c359246eae'
clientSecret = '58baa334e5e34e719da4cbd04c57951b'
#youtube sounds like a cool option
redirect_uri = 'youtube.com'

In [None]:
# Create OAuth Object
o_object= spotipy.SpotifyOAuth(clientID,clientSecret, redirect_uri)
# Create token
token1 = o_object.get_access_token()
token = token1['access_token']
# Create Spotify Object
s_Object = spotipy.Spotify(auth=token)

In [None]:
user = s_Object.current_user()

In [None]:
# Get the track name and artist from the user
track_name = input("Enter Track Name: ")
artist_name = input("Enter Artist Name: ")

# Search for the track and artist
search_query = f"track:{track_name} artist:{artist_name}"
results = s_Object.search(search_query, 1, 0, "track")

# Get required data from JSON response
tracks1 = results['tracks']
if len(tracks1['items']) > 0:
    song_url = tracks1['items'][0]['external_urls']['spotify']
    # Open the song in a web browser
    webbrowser.open(song_url)
else:
    print("No matching tracks found.")