In [1]:
! pip install spotipy pandas



In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import base64
import requests
import time

In [3]:
# spotify credentials
client_id = "340ebe283d9f462489c995e108401871"
client_secret = "a84f4bc0ce67494c969e7a9b818ce574"
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [4]:
# Search for Dolly Parton and get the artist ID
artist_name = "Dolly Parton"
results = sp.search(q=artist_name, type="artist", limit=1)
dolly_id = results['artists']['items'][0]['id']
print(f"Dolly Parton's Artist ID: {dolly_id}")

Dolly Parton's Artist ID: 32vWCbZh0xZ4o9gkz4PsEU


In [5]:
# Get all albums by Dolly Parton
albums = sp.artist_albums(dolly_id, album_type='album', limit=50)
album_list = []
for album in albums['items']:
    album_list.append({'album_name': album['name'], 
                       'album_id': album['id'], 
                       'release_date': album['release_date']})

# Convert to DataFrame
albums_df = pd.DataFrame(album_list)
print(albums_df.head())

                                          album_name                album_id  \
0       Smoky Mountain DNA: Family, Faith and Fables  28bVqBO08O6jcgirj9Yj0l   
1                                  Rockstar (Deluxe)  337iOvlhWakFamgb6x2vKN   
2                                           Rockstar  2thjKwqPvHo5SWzSzudVB2   
3                                     Run, Rose, Run  50A9Yv2SPJByoZIDO5WVzG   
4  A Holly Dolly Christmas (Ultimate Deluxe Edition)  0UKCFUDo5hCdAB4b6tPqQe   

  release_date  
0   2024-11-15  
1   2024-01-19  
2   2023-11-17  
3   2022-03-04  
4   2020-10-02  


In [6]:
# Get track details and popularity from each album
track_list = []

for album in album_list:
    album_id = album['album_id']
    tracks = sp.album_tracks(album_id)
    
    for track in tracks['items']:
        track_info = sp.track(track['id'])
        track_list.append({
            'track_name': track['name'],
            'album_name': album['album_name'],
            'release_date': album['release_date'],
            'popularity': track_info['popularity']
        })

# Convert to DataFrame
tracks_df = pd.DataFrame(track_list)
# Convert release_date to datetime format, handling inconsistent formats
tracks_df['release_date'] = pd.to_datetime(tracks_df['release_date'], errors='coerce')

# Fill missing dates with a default value (e.g., "1900-01-01") or handle them as needed
tracks_df['release_date'] = tracks_df['release_date'].fillna(pd.Timestamp("1900-01-01"))
print(tracks_df.head())



                                          track_name  \
0             Introduction - Tennessee Mountain Home   
1                         My Tennessee Mountain Home   
2   Grooms Tune / Bonaparte's Retreat (Instrumental)   
3                                 I'll Live in Glory   
4  Singing His Praise / Daddy Was an Old Time Pre...   

                                     album_name release_date  popularity  
0  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          24  
1  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          30  
2  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          22  
3  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          21  
4  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          22  


In [7]:
# Extract the year from the release date
tracks_df['release_year'] = tracks_df['release_date'].dt.year


#extract album name
tracks_df['album_name'] = tracks_df['album_name'].str.split('(', expand=True)[0]
print(tracks_df.head())

# Group by year and calculate average popularity
yearly_popularity = tracks_df.groupby('release_year')['popularity'].mean().reset_index()
print(yearly_popularity.sort_values(by='release_year', ascending=True))

                                          track_name  \
0             Introduction - Tennessee Mountain Home   
1                         My Tennessee Mountain Home   
2   Grooms Tune / Bonaparte's Retreat (Instrumental)   
3                                 I'll Live in Glory   
4  Singing His Praise / Daddy Was an Old Time Pre...   

                                     album_name release_date  popularity  \
0  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          24   
1  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          30   
2  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          22   
3  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          21   
4  Smoky Mountain DNA: Family, Faith and Fables   2024-11-15          22   

   release_year  
0          2024  
1          2024  
2          2024  
3          2024  
4          2024  
    release_year  popularity
0           1900   18.250000
1           1975   10.700000
2          

In [8]:
#save dataframes to csv
yearly_popularity.to_csv('yearly_popularity.csv', index=False)

In [9]:
# Create a decade column
tracks_df['decade'] = (tracks_df['release_year'] // 10) * 10

# Group by decade and calculate average popularity
decade_popularity = tracks_df.groupby('decade')['popularity'].mean().reset_index()
print(decade_popularity.sort_values(by='decade', ascending=True))

   decade  popularity
0    1900   18.250000
1    1970   20.260000
2    1980   18.096774
3    1990   17.398374
4    2000   12.304598
5    2010   17.558442
6    2020   21.311594


In [10]:
#save dataframes to csv
decade_popularity.to_csv('decade_popularity.csv', index=False)

In [11]:
# Find the year with the highest average popularity
peak_year = yearly_popularity.loc[yearly_popularity['popularity'].idxmax()]
print(f"Peak Year: {peak_year['release_year']} with Popularity Score: {peak_year['popularity']:.2f}")

Peak Year: 1977.0 with Popularity Score: 27.95


In [12]:
# Example: Get top 5 artists globally based on popularity
top_artists = sp.search(q="*", type="artist", limit=5)
for artist in top_artists['artists']['items']:
    print(f"Artist: {artist['name']}, Popularity: {artist['popularity']}")

Artist: *NSYNC, Popularity: 72
Artist: *67, Popularity: 45
Artist: Eric Carmen, Popularity: 65
Artist: Clifford Brown, Popularity: 47
Artist: Sidney Bechet, Popularity: 46


In [13]:
#Find Dolly Parton's popularity
dolly_popularity = sp.artist(dolly_id)['popularity']
print(f"Dolly Parton's Popularity: {dolly_popularity}")

Dolly Parton's Popularity: 74


Dolly Parton's popularity is relative to other artists on Spotify, and the popularity value provided by the Spotify API is based on how well a track or artist is performing in relation to other artists and tracks at any given time.

How is Popularity Measured on Spotify?
Spotify calculates the popularity score for each track and artist using several factors, including:

Streaming data (how often tracks are played).
User interaction (how often songs are added to playlists, liked, shared, etc.).
Recency of plays (newer plays are weighted more heavily).
Overall listening trends within Spotify’s user base.
The popularity score ranges from 0 to 100, where:

100 indicates the track is extremely popular on the platform.
0 means it's not well known or very obscure.
However, the score is not an absolute measure of a track's popularity across the entire world; it’s relative to the general Spotify audience at any point in time.

### Fetch Popularity Data for Dolly Parton, Taylor Swift, and Drake

In [15]:
# Function to get the artist popularity
def get_artist_popularity(artist_name):
    results = sp.search(q=artist_name, type='artist', limit=1)
    artist_id = results['artists']['items'][0]['id']
    artist_data = sp.artist(artist_id)
    return artist_name, artist_data['popularity']

# Compare Dolly Parton, Taylor Swift, and Drake
artists = ['Dolly Parton', 'Taylor Swift', 'Drake', 'Elvis Presley']
popularity_comparison = {}

for artist in artists:
    name, popularity = get_artist_popularity(artist)
    popularity_comparison[name] = popularity

# Display the popularity comparison
for artist, popularity in popularity_comparison.items():
    print(f"{artist}'s Popularity: {popularity}")

Dolly Parton's Popularity: 74
Taylor Swift's Popularity: 97
Drake's Popularity: 97
Elvis Presley's Popularity: 78
