In [1]:
! pip install spotipy pandas



In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [3]:
# spotify credentials
client_id = "340ebe283d9f462489c995e108401871"
client_secret = "a84f4bc0ce67494c969e7a9b818ce574"
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [4]:
# function to get the audio features of a song
def get_tracks_from_playlist(playlist_id):
    tracks = []
    results = sp.playlist_tracks(playlist_id)
    
    while results:
        for item in results["items"]:
            track = item["track"]
            if track:
                tracks.append({
                    "track_id": track["id"],
                    "album_id": track["album"]["id"],
                    "artist_id": track["artists"][0]["id"],  # Primary artist
                    "release_date": track["album"]["release_date"],
                    "popularity": track["popularity"]
                })
        results = sp.next(results)
    
    return pd.DataFrame(tracks)

In [5]:
# get artists genres
def get_artist_genres(artist_ids):
    artist_data = []
    # Fetch artist info in batches of 50 (Spotify API limit)
    for i in range(0, len(artist_ids), 50):
        batch = artist_ids[i:i+50]
        artists = sp.artists(batch)["artists"]
        for artist in artists:
            genres = artist.get("genres", [])
            genre_list = genres if genres else ["Unknown"]
            for genre in genre_list:
                artist_data.append({
                    "artist_id": artist["id"],
                    "genres": genre
                })
    return pd.DataFrame(artist_data)

In [6]:
# get album details
def get_album_details(album_ids):
    albums = []
    # Fetch album info in batches of 20 (Spotify API limit)
    for i in range(0, len(album_ids), 20):
        batch = album_ids[i:i+20]
        album_results = sp.albums(batch)["albums"]
        for album in album_results:
            albums.append({
                "album_id": album["id"],
                "release_date": album["release_date"]
            })
    return pd.DataFrame(albums)

In [7]:
# Get tracks from a playlist (replace with a real playlist ID)
playlist_id = "7c2c13pKxvFDSV4WSyydyg"  # Example: Spotify's Top Hits 2025 playlist
tracks_df = get_tracks_from_playlist(playlist_id)

In [8]:
def calculate_genre_popularity(playlist_id):
    # 1. Get tracks from the playlist
    tracks_df = get_tracks_from_playlist(playlist_id)

In [9]:
 # 2. Get unique artist and album IDs
unique_artist_ids = tracks_df["artist_id"].unique()
unique_album_ids = tracks_df["album_id"].unique()

In [10]:
   # 3. Get artist genres and album release dates
artists_df = get_artist_genres(unique_artist_ids)
albums_df = get_album_details(unique_album_ids)

In [11]:
 # 4. Merge data to combine all information
merged_df = (
    tracks_df.merge(albums_df, on="album_id", how="left")  # Merge album release_date
    .merge(artists_df, on="artist_id", how="left")  # Merge artist genres
)

In [12]:
def calculate_avg_popularity(merged_df):
    # 5. Group by genres and release_date_y to calculate avg_popularity
    result_df = (
        merged_df.groupby(["genres", "release_date_y"])
        .agg(avg_popularity=("popularity", "mean"))
        .reset_index()
        .sort_values(by="release_date_y")
    )
    
    return result_df

In [13]:
# Get genre popularity over time
genre_popularity_df = calculate_genre_popularity(playlist_id)

# Display result
print(merged_df)

                    track_id                album_id               artist_id  \
0     33FPsMEl3UwpytDuyf9VYq  2KaqhTuykw9yrstJJMgiBe  73sSFVlM6pkweLXE8qw1OS   
1     33FPsMEl3UwpytDuyf9VYq  2KaqhTuykw9yrstJJMgiBe  73sSFVlM6pkweLXE8qw1OS   
2     5zyrEv4F3FaLECI8TOKpFM  490sLjsTmSo7MnA8noKMOH  4llklDtTTyMYMY2LfFOkTI   
3     5zyrEv4F3FaLECI8TOKpFM  490sLjsTmSo7MnA8noKMOH  4llklDtTTyMYMY2LfFOkTI   
4     5zyrEv4F3FaLECI8TOKpFM  490sLjsTmSo7MnA8noKMOH  4llklDtTTyMYMY2LfFOkTI   
...                      ...                     ...                     ...   
1597  0Z7nGFVCLfixWctgePsRk9  6cBlaud5JVmPjkjxnwIMLx  6vWDO969PvNqNYHIOW5v0m   
1598  3w0w2T288dec0mgeZZqoNN  30zwjSQEodaUXCn11nmiVF  4xPQFgDA5M2xa0ZGo5iIsv   
1599  51ZQ1vr10ffzbwIjDCwqm4  5EYKrEDnKhhcNxGedaRQeK  66CXWjxzNUsdJxJ2JdwvnR   
1600  6usohdchdzW9oML7VC4Uhk  5QMiub2LonMqxB7dhtbPlX  33qOK5uJ8AR2xuQQAhHump   
1601  2tudvzsrR56uom6smgOcSf  4iqbFIdGOTzXeDtt9owjQn  1RyvyyTE3xzB2ZywiAwp0i   

     release_date_x  popularity release

In [14]:
# Save the resulting DataFrame to a CSV file
merged_df.to_csv("genre_popularity_over_time.csv", index=False)

print("CSV file saved as 'genre_popularity_over_time.csv'!")

CSV file saved as 'genre_popularity_over_time.csv'!


In [15]:
# Count the number of occurrences of each genre
genre_count_df = merged_df.groupby("genres").size().reset_index(name="count")

# Display the genre count DataFrame
print(genre_count_df)

                  genres  count
0                Unknown    487
1              acid rock     10
2       acoustic country      1
3        adult standards     32
4              afro soul      1
..                   ...    ...
120  traditional country      2
121                 trap      1
122    variété française      3
123           vocal jazz      7
124   west coast hip hop      5

[125 rows x 2 columns]


In [16]:
# Save the genre count to a CSV file
genre_count_df.to_csv("genre_count.csv", index=False)
print("CSV file saved as 'genre_count.csv'!")

CSV file saved as 'genre_count.csv'!


Motown is the most popular genre from 1958 to 2024