# Initialization & Imports
This section initializes the Spotify client using the provided client ID and secret, which are necessary for accessing the Spotify API. These credentials allow secure communication with the Spotify servers.


In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
import re
import csv

In [None]:
#uses spotify api to get valence, energy, and genere of each song

SPOTIFY_CLIENT_ID = 'INPUT_CLIENT_ID'
SPOTIFY_CLIENT_SECRET = 'INPUT_CLIENT_SECRET'

# Spotify client setup
auth_manager = SpotifyClientCredentials(client_id=SPOTIFY_CLIENT_ID, client_secret=SPOTIFY_CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

# Cleaning Song Titles
This function takes a song title as input and removes any content enclosed in parentheses or brackets, cleaning up the title for a more accurate search. This step ensures that extraneous information doesn't interfere with metadata retrieval.

In [None]:
def clean_song_title(song_title):
    # Remove content in parentheses and brackets, and trim extra spaces
    cleaned_title = re.sub(r"[\(\[].*?[\)\]]", "", song_title).strip()
    return cleaned_title


# Get Spotify Genre
This function searches Spotify's database for the specified song and retrieves its genre. It constructs a query using the song's title and optionally its artist's name. After retrieving the track data, it fetches and returns the artist's associated genres.


In [None]:
# Function to get Spotify genre metadata
def get_spotify_genre(track_name, artist_name=None):
    query = f"track:{track_name}"
    if artist_name:
        query += f" artist:{artist_name}"

    # Search for the track
    result = spotify.search(query, type='track', limit=1)

    if result['tracks']['items']:
        track = result['tracks']['items'][0]

        # Get the artist's information, which may include genres
        artist_id = track['artists'][0]['id']
        artist_info = spotify.artist(artist_id)

        genres = artist_info.get('genres', [])

        return genres

    return None

# Obtain Valence
This function searches for a song and retrieves its valence, which measures the song's mood. The query is constructed similarly to the previous function. After retrieving the track's data, it returns the valence value.

In [None]:
# Path to your spectrograms
audio_spect_dir = "/content/audio_spects_2"

# Loop through the files and rename with Spotify genre metadata
def get_spotify_valence(song_title, artist_name=None):
    # Create the query with the correct order of song title and artist name
    query = f"track:{song_title}"
    if artist_name:
        query += f" artist:{artist_name}"

    # Search for the track
    result = spotify.search(query, type='track', limit=1)

    if result['tracks']['items']:
        track = result['tracks']['items'][0]

        # Get audio features, including valence
        audio_features = spotify.audio_features(track['id'])[0]

        return audio_features.get('valence', None)  # Return the valence, or None if not found

    return None

# Obtain Energy
This function retrieves a song's energy value, representing its dynamism or liveliness. The function uses a similar query pattern to previous functions. The energy value is then extracted from the track's metadata and returned.

In [None]:
# Path to your spectrograms
audio_spect_dir = "/content/audio_spects_2"

def get_spotify_energy(song_title, artist_name=None):
    query = f"track:{song_title}"
    if artist_name:
        query += f" artist:{artist_name}"

    # Search for the track
    result = spotify.search(query, type='track', limit=1)

    if result['tracks']['items']:
        track = result['tracks']['items'][0]

        # Get audio features, including energy
        audio_features = spotify.audio_features(track['id'])[0]

        return audio_features.get('energy', None)  # Return the energy

    return None

# Main Loop
This section defines the directory where spectrogram images of songs are stored. It then creates an output CSV file to store Spotify metadata for each song.The script iterates over each PNG file in the directory. For each file:

* The artist name and song title are extracted from the file name.
* The song title is cleaned.
* The Spotify API is used to retrieve valence, genre, and energy values.

If all metadata is retrieved successfully, it's written into the CSV file. The metadata is also printed for each song.

In [None]:
count = 0
# CSV output file
output_csv = "spotify_metadata.csv"


#for songs with valid spotify metadata, we save them to a csv to label each spectrogram
# Open the CSV file for writing
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header row
    writer.writerow(["Title", "Valence", "Genre", "Energy"])

    for file_name in os.listdir(audio_spect_dir):
        if file_name.lower().endswith('.png'):  # Considering these are image files
            # Extract artist name and song title from the file name
            base_name = os.path.splitext(file_name)[0]
            artist_name, _, song_title = base_name.partition(" - ")

            # Clean the song title
            song_title = clean_song_title(song_title)

            # Initialize variables to hold Spotify data
            valence = None
            genre = None
            energy = None

            # Attempt to get Spotify valence, genre, and energy with error handling
            try:
                valence = get_spotify_valence(song_title.strip(), artist_name.strip())
                genre = get_spotify_genre(song_title.strip(), artist_name.strip())
                energy = get_spotify_energy(song_title.strip(), artist_name.strip())
            except Exception as e:
                print(f"Error fetching data for '{song_title}': {e}")
                continue  # Skip to the next file if there's an error

            # Write to CSV if all metadata is present
            if valence is not None and genre and energy is not None:
                writer.writerow([base_name, valence, genre[0] if genre else None, energy])  # Genre is assumed to be a list

            # Print the metadata
            print(f"Song: '{song_title}' by '{artist_name}'")
            if valence is not None: print(f"Valence: {valence:.2f}")
            if energy is not None: print(f"Energy: {energy:.2f}")
            if genre: print(f"Genre: {genre[0]}")  # Assumes genre returns a list

            count += 1 if valence is not None and energy is not None and genre else 0
            print(f"Metadata retrieval count: {count}")
            print("-" * 30)