# EXTRACTER

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

## 1. Getting the tracks from spotify

In [2]:
def get_playlist_data(sp, playlist_id):
    all_tracks = []
    offset = 0
    limit = 100

    while True:
        try:
            response = sp.playlist_items(playlist_id, offset=offset, limit=limit)
            items = response['items']
            
            if not items:  # Stop if no more tracks
                break
            
            for item in items:
                track = item['track']
                if track:  # Check if track is not None
                    # Fetch artist genres
                    artist_ids = [artist['id'] for artist in track['artists']]
                    genres = []
                    for artist_id in artist_ids:
                        artist_info = sp.artist(artist_id)
                        genres.extend(artist_info.get('genres', []))
                    
                    track_info = {
                        'name': track['name'],
                        'artist': ', '.join(artist['name'] for artist in track['artists']),
                        'album': track['album']['name'],
                        'release_date': track['album']['release_date'],
                        'duration_ms': track['duration_ms'],
                        'popularity': track['popularity'],
                        'genres': list(set(genres))  # Remove duplicates
                    }
                    all_tracks.append(track_info)
            
            offset += limit
        except Exception as e:
            print(f"Error: {e}")
            break

    return all_tracks

def save_genre_histogram(df, filename='genre_histogram_horizontal.png', dpi=600):
    # Flatten the list of genres
    all_genres = [genre for genres_list in df['genres'] for genre in genres_list]

    # Count occurrences of each genre
    genre_counts = Counter(all_genres)

    # Create the DataFrame for plotting
    genre_df = pd.DataFrame(genre_counts.items(), columns=['Genre', 'Count'])

    # Sort genres by frequency
    genre_df = genre_df.sort_values(by='Count', ascending=False)

    # Plot the horizontal bar chart
    plt.figure(figsize=(12, 50))  # Tall figure for better readability
    plt.barh(genre_df['Genre'], genre_df['Count'], color='skyblue')
    plt.xlabel('Frequency')
    plt.ylabel('Genres')
    plt.title('Genre Distribution in Playlist')
    plt.tight_layout(pad=2.0)  # Add extra padding
    plt.gca().invert_yaxis()  # Flip genres to start from the most frequent

    # Save the plot to a file
    plt.savefig(filename, dpi=dpi)
    print(f"Histogram saved to {filename}")


In [None]:
# Set your Spotify API credentials
CLIENT_ID = "d125d6339f6d4a7da8b304c605bf20b6"
CLIENT_SECRET = "0258ebafe0b2402ea990eade3499e7cf"

# Authenticate with Spotify API
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET))

# Example usage
playlist_id = "6cR4y6y6ExPNk93BodOG56"  # Use the playlist ID (or URI)
playlist_name = "techno_flow_state" 
path = f'data/{playlist_name}-{playlist_id}.csv'
df = pd.DataFrame(get_playlist_data(sp, playlist_id))

df['YouTube_Title'] = [None for i in range(len(df))]
df['YouTube_URL'] = [None for i in range(len(df))]
df.to_csv(path, index=False)

In [6]:
df

Unnamed: 0,name,artist,album,release_date,duration_ms,popularity,genres,YouTube Title,YouTube URL
0,Glue,BICEP,Bicep,2017-09-01,269149,70,"[electronica, ambient house]",,
1,Because You Move Me,"Tinlicker, Helsloot",Because You Move Me,2017-07-31,196375,75,"[melodic house, house]",,
2,BLUEBERRY YUM YUM,"BHZ, Monk, Dead Dawg, Big Pat",HALB:VIER DELUXE,2021-07-16,232844,0,"[german alternative rap, german trap, german h...",,
3,Crimewave,"Crystal Castles, HEALTH",Crystal Castles,2008-03-16,258453,62,"[nintendocore, bitpop, indietronica, neo-synth...",,
4,Overthinker,INZO,Overthinker,2018-07-23,268000,4,[],,
...,...,...,...,...,...,...,...,...,...
326,Color Blind,"Diplo, Lil Xan",Color Blind,2018-03-22,177480,54,"[edm, electro house, dance pop, cloud rap, pop...",,
327,Jenny (I Wanna Ruin Our Friendship),Studio Killers,Jenny (I Wanna Ruin Our Friendship),2020-12-04,215280,66,[],,
328,Mi Fábrica de Baile,"Hnos Munoz, Joe Crepúsculo",Mi Fábrica de Baile,2023-12-14,148064,41,"[r&b en espanol, spanish indie pop, spanish no...",,
329,heavy,MAYV,heavy,2024-12-16,173731,16,[],,


In [3]:
save_genre_histogram(df, filename='genre_histogram_highres.png', dpi=600)

NameError: name 'df' is not defined

## 2. Searching on youtube

In [13]:

def search_youtube(song_name, api_key, max_results=1):
    try:
        # Initialize YouTube API client
        youtube = build('youtube', 'v3', developerKey=api_key)

        # Perform the search
        search_response = youtube.search().list(
            q=song_name,
            part='snippet',
            maxResults=max_results,
            type='video'
        ).execute()

        # Extract the first video link and title
        if search_response['items']:
            video_id = search_response['items'][0]['id']['videoId']
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            video_title = search_response['items'][0]['snippet']['title']
            return video_title, video_url
        return None, None
    except HttpError as e:
        if e.resp.status == 403:
            print("Quota exceeded. Skipping...")
            return None, None
        else:
            print(f"An error occurred: {e}")
            return None, None


def create_youtube_playlist_from_spotify(spotify_df, api_key):
    youtube_titles = []
    youtube_urls = []
    
    for _, row in spotify_df.iterrows():
        song_name = f"{row['name']} {row['artist']}"
        if(type(row['YouTube_Title'])!=str):
            youtube_title, youtube_url = search_youtube(song_name, api_key)
            print(f"Found: {song_name} -> {youtube_title} -> {youtube_url}")
        else:
            youtube_title, youtube_url = row['YouTube_Title'], row['YouTube_URL']
            
        # Append the data to the lists
        youtube_titles.append(youtube_title)
        youtube_urls.append(youtube_url)
        

    # Add the YouTube columns to the DataFrame
    spotify_df['YouTube_Title'] = youtube_titles
    spotify_df['YouTube_URL'] = youtube_urls

    return spotify_df

In [14]:
# https://console.cloud.google.com/apis/api/youtube.googleapis.com/quotas?inv=1&invt=AblV9w&project=youtube-music-search-446115&pageState=(%22allQuotasTable%22:(%22s%22:%5B(%22i%22:%22currentPercent%22,%22s%22:%221%22),(%22i%22:%22sevenDayPeakPercent%22,%22s%22:%220%22),(%22i%22:%22currentUsage%22,%22s%22:%221%22),(%22i%22:%22sevenDayPeakUsage%22,%22s%22:%220%22),(%22i%22:%22serviceTitle%22,%22s%22:%220%22),(%22i%22:%22displayName%22,%22s%22:%220%22),(%22i%22:%22displayDimensions%22,%22s%22:%220%22)%5D,%22f%22:%22%255B%255D%22))
api_key = 'AIzaSyCUc2-Xr3_OLVSt6Cga60hBmGS4N-hp6Ak'

playlist_id = "6cR4y6y6ExPNk93BodOG56"  # Use the playlist ID (or URI)
playlist_name = "techno_flow_state" 
path = f'data/{playlist_name}-{playlist_id}.csv'
df = create_youtube_playlist_from_spotify(pd.read_csv(path, sep=';'), api_key)

# Save the updated DataFrame to a CSV
df.to_csv(path, index=False)
print(f"CSV saved as {path}")
df

Found: praise the lord tekkno - Remix Muppet DJ, SECA Records -> we are the people tekkno -> https://www.youtube.com/watch?v=BbOb2H0h0H4
Found: SUPERNOVA SAIKO -> SAIKO - SUPERNOVA (Official Video) | SAKURA -> https://www.youtube.com/watch?v=BbZi8xGMyuM
Found: Paso de Entrar Jarfaiter, Parkineos -> Jarfaiter x @parkineos  - PASO DE ENTRAR - 💣 -> https://www.youtube.com/watch?v=RyjZBrWlXPQ
Found: se que no debo Orslok -> orslok - se que no debo -> https://www.youtube.com/watch?v=zLqeGiTJLKg
Found: Tofu Delivery Orslok, Rojuu -> orslok, rojuu - tofu delivery -> https://www.youtube.com/watch?v=wxKBvF2pS_A
Found: Casa Kira Carolina Durante, Orslok -> Carolina Durante, Orslok - Casa Kira -> https://www.youtube.com/watch?v=49ISq1mbJ6k
Found: Xanadu Ummet Ozcan -> Ummet Ozcan - Xanadu (Mongolian Techno) -> https://www.youtube.com/watch?v=9uMtnH7cABg
Found: Marianela (Que Pasa) - Extended Mix HUGEL, Merk & Kremont, Lirico En La Casa -> HUGEL, Merk &amp; Kremont, Lirico En La Casa - Marianela (

Unnamed: 0,name,artist,album,release_date,duration_ms,popularity,genres,YouTube_Title,YouTube_URL,downloaded
0,Glue,BICEP,Bicep,01/09/2017,269149,70,"['electronica', 'ambient house']",BICEP | GLUE (Official Video),https://www.youtube.com/watch?v=A7ZxRs45tTg,True
1,Because You Move Me,"Tinlicker, Helsloot",Because You Move Me,31/07/2017,196375,75,"['melodic house', 'house']",Tinlicker & Helsloot - Because You Move Me,https://www.youtube.com/watch?v=q-e8-kaWDvI,True
2,BLUEBERRY YUM YUM,"BHZ, Monk, Dead Dawg, Big Pat",HALB:VIER DELUXE,16/07/2021,232844,0,"['german alternative rap', 'german trap', 'ger...",BLUEBERRY YUM YUM,https://www.youtube.com/watch?v=VNJmhXQoB0Q,True
3,Crimewave,"Crystal Castles, HEALTH",Crystal Castles,16/03/2008,258453,62,"['nintendocore', 'bitpop', 'indietronica', 'ne...",Crystal Castles - Crimewave (Crystal Castles v...,https://www.youtube.com/watch?v=ayc4Nv1fnZY,True
4,Overthinker,INZO,Overthinker,23/07/2018,268000,4,[],INZO - Overthinker,https://www.youtube.com/watch?v=luQSQuCHtcI,True
...,...,...,...,...,...,...,...,...,...,...
326,Color Blind,"Diplo, Lil Xan",Color Blind,22/03/2018,177480,54,"['edm', 'electro house', 'dance pop', 'cloud r...",,,False
327,Jenny (I Wanna Ruin Our Friendship),Studio Killers,Jenny (I Wanna Ruin Our Friendship),04/12/2020,215280,66,[],,,False
328,Mi FÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃ...,"Hnos Munoz, Joe CrepÃÂÃÂÃÂÃÂÃÂÃÂÃ...",Mi FÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃ...,14/12/2023,148064,41,"['r&b en espanol', 'spanish indie pop', 'spani...",,,False
329,heavy,MAYV,heavy,16/12/2024,173731,16,[],,,False


## 3. Downloading the videos