# Import libraries

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import psycopg2

# Retrieve data from Spotify, 3 songs for each genre

In [None]:
# Set your Spotify API credentials
client_id = '6c2ff081fe494968a2e3a618c678fd62'
client_secret = '99da4179a82f4f999058535d8d9b47dc'

In [None]:
# Initialize the Spotipy client
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id, client_secret))

In [None]:
# Define seed genres
seed_genres = ['anime', 'ambient', 'classical', 'country', 'chill', 'edm', 'folk', 'gospel', 'hip-hop', 'jazz', 'k-pop', 'latin', 'metal', 'pop', 'r-n-b', 'rock', 'video-game-music', 'rap', 'lofi']

In [None]:
# Initialize an empty list to store track data
all_track_data = []

In [None]:
for seed_genre in seed_genres:
    recommended_tracks = sp.recommendations(seed_genres=[seed_genre], limit=3, country="SG")
    for track in recommended_tracks["tracks"]:
        track_data = {
            "track_name": track["name"],
            "artist": ", ".join(artist["name"] for artist in track["artists"]),
            "album": track["album"]["name"],
            "genre": seed_genre,
            "image": track["album"]["images"][0]["url"],
            "preview_url": track["preview_url"],
            "url": track['album']['external_urls']['spotify']
        }
        all_track_data.append(track_data)
        
        # Search for individual track by ID to get "popularity".
        track_search = sp.track(track['id'])
        track_data["popularity"] = track_search["popularity"]

        # Search for audio features of track by ID
        audio_features = sp.audio_features(track['id'])[0]
        for key, value in audio_features.items():
            track_data[key] = value

# Convert Spotify data to dataframe and inspect data

In [None]:
# Create a DataFrame from the track data
df = pd.DataFrame(all_track_data)

In [None]:
# Ensure print dataframe with full column width for url display
pd.set_option('display.max_colwidth', None)  

print(df.head(3))

In [None]:
# take a look at # of rows/columns
print(df.shape)

In [None]:
print(df['genre'].unique())

In [None]:
# Check for nulls in each column
total_nulls = df.isnull().sum()
# Percent_nulls = total_nulls * 100 / len(df)
print(total_nulls)

In [None]:
# Check for duplicates
print(df[df.duplicated()])

In [None]:
# Drop 'type', 'uri', 'track_href', 'analysis_url' columns
df.drop(columns=['type', 'uri', 'track_href', 'analysis_url'], inplace=True)
print(df.head(2))

# Connect to DB , create table and load data into DB

In [None]:
# Connect to PostgreSQL database
db_params = {
    "host": "rain.db.elephantsql.com",
    "dbname": "auspovuc",  # Replace with your desired database name
    "user": "auspovuc",  # Replace with your PostgreSQL username
    "password": "bmJdG19Daw9rkEsJ3VnkefRGCBF_oy7F",  # Replace with your PostgreSQL password
}

conn = psycopg2.connect(**db_params)
cur = conn.cursor()

In [None]:
# Create a table recommended_tracks
cur.execute("""
    CREATE TABLE IF NOT EXISTS recommended_tracks (
        track_name VARCHAR,
        artist VARCHAR,
        album VARCHAR,
        genre VARCHAR,
        image VARCHAR,
        preview_url VARCHAR,
        url VARCHAR,
        popularity SMALLINT,
        danceability FLOAT,
        energy FLOAT,
        key SMALLINT,
        loudness FLOAT,
        mode SMALLINT,
        speechiness FLOAT,
        acousticness FLOAT,
        liveness FLOAT, 
        valence FLOAT,
        tempo FLOAT,
        id VARCHAR,
        duration_ms INT,
        time_signature SMALLINT
    )
""")
conn.commit()

In [None]:
# # Insert data into the table
for _, row in df.iterrows():
    cur.execute("""
        INSERT INTO recommended_tracks (track_name, artist, album, genre, image, preview_url, url, popularity, danceability, energy, key, loudness, mode, speechiness, acousticness, liveness, valence, tempo, id, duration_ms, time_signature)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, (row["track_name"], row["artist"], row["album"], row["genre"], row["image"], row["preview_url"], row["url"], row["popularity"], row["danceability"], row["energy"], row["key"], row["loudness"], row["mode"], row["speechiness"], row["acousticness"], row["liveness"], row["valence"], row["tempo"], row["id"], row["duration_ms"], row["time_signature"]))

conn.commit()

In [None]:
# Close connection
conn.close()