# Spotify Data Extraction

## Import Libraries

In [8]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import pandas as pd

## Credentials

In [9]:
client_id = '62d3c0f89081441c9b7e0e9ee909e29f'
client_secret = '7ff8efd44cbc4c9db03a4891318b6c26'

## Authenticate

In [10]:
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)
redirect_uri = 'http://127.0.0.1:8889/callback'

In [11]:
# Scope for reading playlists
scope = "playlist-read-private playlist-read-collaborative"

In [12]:
# Authenticate with OAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=client_id,
    client_secret=client_secret,
    redirect_uri=redirect_uri,
    scope=scope
))

In [13]:
# Replace this with your playlist URI or ID
playlist_uri = 'https://open.spotify.com/playlist/4scXszKyPSHBuJJvgYk0HA?si=500e74758fb24633'

In [14]:
# Extract tracks
results = sp.playlist_tracks(playlist_uri)
tracks = results['items']

In [19]:
track_data = []

for item in tracks:
    track = item.get('track')
    if not track or not track.get('album'):
        continue  # skip if no track or no album

    release_date = track['album'].get('release_date')
    if not release_date:
        continue  # skip if no release date

    try:
        year = int(release_date.split('-')[0])
        if 2000 <= year <= 2009:
            track_data.append({
                "track": track['name'],
                "artist": track['artists'][0]['name'],
                "album": track['album']['name'],
                "release_year": year,
                "uri": track['uri']
            })
    except Exception as e:
        print(f"Skipping due to unexpected error: {e}")

In [20]:
# Convert to DataFrame and filter 2000s
df = pd.DataFrame(track_data)
df_2000s = df[(df['release_year'] >= 2000) & (df['release_year'] <= 2009)]

# Output
print(df_2000s)

                                               track                 artist  \
0                                   This Is The Life          Amy Macdonald   
1   Si Te Vas / Que Tengo Que Hacer - Original Remix                  Omega   
2                                            Changes      Butterfly Boucher   
3                                          Fi er min                Natasja   
4                                          Op og ned          Thomas Helmig   
5                                Relax, Take It Easy                   MIKA   
6                                       In This City          Iglu & Hartly   
7                     Slow Dancing in a Burning Room             John Mayer   
8                                               Kids                   MGMT   
9                                         Can't Stop  Red Hot Chili Peppers   
10                                     Like a Prayer                Madonna   
11                 Young Forever [Jay-Z + Mr Hudson]

In [None]:
# Optional: Save
#df_2000s.to_csv("origin_2000s_songs.csv", index=False)

In [21]:
track_data = []

for item in tracks:
    track = item.get('track')
    if not track or not track.get('album'):
        continue  # skip if no track or no album

    release_date = track['album'].get('release_date')
    if not release_date:
        continue  # skip if no release date

    try:
        year = int(release_date.split('-')[0])
        if 2010 <= year <= 2019:
            track_data.append({
                "track": track['name'],
                "artist": track['artists'][0]['name'],
                "album": track['album']['name'],
                "release_year": year,
                "uri": track['uri']
            })
    except Exception as e:
        print(f"Skipping due to unexpected error: {e}")

In [22]:
# Convert to DataFrame and filter 2000s
df = pd.DataFrame(track_data)
df_2010s = df[(df['release_year'] >= 2010) & (df['release_year'] <= 2019)]

# Output
print(df_2000s)

                                               track                 artist  \
0                                   This Is The Life          Amy Macdonald   
1   Si Te Vas / Que Tengo Que Hacer - Original Remix                  Omega   
2                                            Changes      Butterfly Boucher   
3                                          Fi er min                Natasja   
4                                          Op og ned          Thomas Helmig   
5                                Relax, Take It Easy                   MIKA   
6                                       In This City          Iglu & Hartly   
7                     Slow Dancing in a Burning Room             John Mayer   
8                                               Kids                   MGMT   
9                                         Can't Stop  Red Hot Chili Peppers   
10                                     Like a Prayer                Madonna   
11                 Young Forever [Jay-Z + Mr Hudson]

# All decades

In [23]:
# --- Fetch playlist tracks ---
results = sp.playlist_items(playlist_uri)
tracks = results['items']
while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

In [24]:
# --- Initialise decade buckets ---
decades = {
    "1980s": [],
    "1990s": [],
    "2000s": [],
    "2010s": [],
    "2020s": [],
    "Unknown": []
}

In [25]:
for item in tracks:
    track = item.get('track')
    if not track or not track.get('album'):
        continue

    release_date = track['album'].get('release_date')
    if not release_date:
        decades["Unknown"].append(track)
        continue

    try:
        year = int(release_date.split('-')[0])
        entry = {
            "track": track['name'],
            "artist": track['artists'][0]['name'],
            "album": track['album']['name'],
            "release_year": year,
            "uri": track['uri']
        }

        if 1980 <= year <= 1989:
            decades["1980s"].append(entry)
        elif 1990 <= year <= 1999:
            decades["1990s"].append(entry)
        elif 2000 <= year <= 2009:
            decades["2000s"].append(entry)
        elif 2010 <= year <= 2019:
            decades["2010s"].append(entry)
        elif 2020 <= year <= 2029:
            decades["2020s"].append(entry)
        else:
            decades["Unknown"].append(entry)

    except Exception as e:
        print(f"Skipping due to unexpected error: {e}")
        decades["Unknown"].append(track)

In [26]:
# --- Convert to DataFrames ---
df_1980s = pd.DataFrame(decades["1980s"])
df_1990s = pd.DataFrame(decades["1990s"])
df_2000s = pd.DataFrame(decades["2000s"])
df_2010s = pd.DataFrame(decades["2010s"])
df_2020s = pd.DataFrame(decades["2020s"])
df_unknown = pd.DataFrame(decades["Unknown"])

In [27]:
# --- Print summary ---
print(f"🎶 Tracks by Decade:")
for label, data in decades.items():
    print(f"{label}: {len(data)} tracks")

🎶 Tracks by Decade:
1980s: 2 tracks
1990s: 34 tracks
2000s: 219 tracks
2010s: 469 tracks
2020s: 51 tracks
Unknown: 14 tracks


In [28]:
import openpyxl

In [29]:
# Export all decades to a single Excel file with separate sheets
with pd.ExcelWriter("tracks_by_decade.xlsx", engine='openpyxl') as writer:
    df_1980s.to_excel(writer, sheet_name='1980s', index=False)
    df_1990s.to_excel(writer, sheet_name='1990s', index=False)
    df_2000s.to_excel(writer, sheet_name='2000s', index=False)
    df_2010s.to_excel(writer, sheet_name='2010s', index=False)
    df_2020s.to_excel(writer, sheet_name='2020s', index=False)
    df_unknown.to_excel(writer, sheet_name='Unknown', index=False)