# **Spotify Personal Project**

# Imports:

In [1]:
import os
import os.path
import time
from datetime import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

import tensorflow as tf

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
os.getcwd()

'/content'

# Read In Data:

In [3]:
os.chdir('/content/drive/My Drive/Personal Project Data/Spotify Extended Streaming History')

os.listdir()

['Streaming_History_Audio_2015-2021_0.json',
 'Streaming_History_Audio_2021-2023_1.json',
 'Streaming_History_Audio_2023-2024_3.json',
 'Streaming_History_Audio_2023_2.json',
 'Streaming_History_Audio_2024-2025_6.json',
 'ReadMeFirst_ExtendedStreamingHistory.pdf',
 'Streaming_History_Video_2018-2025.json',
 'Streaming_History_Audio_2024_4.json',
 'Streaming_History_Audio_2024_5.json',
 'Streaming_History_Audio_2025_7.json']

In [4]:
stream_2015_2021 = pd.read_json('Streaming_History_Audio_2015-2021_0.json')
stream_2021_2023 = pd.read_json('Streaming_History_Audio_2021-2023_1.json')
stream_2023 = pd.read_json('Streaming_History_Audio_2023_2.json')
stream_2023_2024 = pd.read_json('Streaming_History_Audio_2023-2024_3.json')
stream_2024_4 = pd.read_json('Streaming_History_Audio_2024_4.json')
stream_2024_5 = pd.read_json('Streaming_History_Audio_2024_5.json')
stream_2024_2025 = pd.read_json('Streaming_History_Audio_2024-2025_6.json')
stream_2025 = pd.read_json('Streaming_History_Audio_2025_7.json')

# Prepare Data

In [5]:
spotify = pd.concat([stream_2015_2021,stream_2021_2023,stream_2023,stream_2023_2024,stream_2024_4,stream_2024_5,stream_2024_2025,stream_2025], axis=0)

In [6]:
spotify.columns

Index(['ts', 'platform', 'ms_played', 'conn_country', 'ip_addr',
       'master_metadata_track_name', 'master_metadata_album_artist_name',
       'master_metadata_album_album_name', 'spotify_track_uri', 'episode_name',
       'episode_show_name', 'spotify_episode_uri', 'audiobook_title',
       'audiobook_uri', 'audiobook_chapter_uri', 'audiobook_chapter_title',
       'reason_start', 'reason_end', 'shuffle', 'skipped', 'offline',
       'offline_timestamp', 'incognito_mode'],
      dtype='object')

In [7]:
spotify.rename(columns={
    "ts": "timestamp",
    "conn_country": "country",
    "ip_addr": "ip_address",
    "master_metadata_track_name": "track_name",
    "master_metadata_album_artist_name": "artist_name",
    "master_metadata_album_album_name": "album_name",
    "spotify_track_uri": "track_uri",
}, inplace=True)

In [9]:
columns_to_drop = ['episode_name','episode_show_name','spotify_episode_uri','audiobook_title','audiobook_uri','audiobook_chapter_uri','audiobook_chapter_title','incognito_mode','offline_timestamp','offline','platform','ip_address']
spotify.drop(columns=columns_to_drop, axis=1, inplace=True)

In [10]:
spotify

Unnamed: 0,timestamp,ms_played,country,track_name,artist_name,album_name,track_uri,reason_start,reason_end,shuffle,skipped
0,2015-11-30T16:39:32Z,205879,US,Jumpman,Drake,What A Time To Be Alive,spotify:track:27GmP9AWRs744SzKcpJsTZ,clickrow,trackdone,False,False
1,2015-11-30T16:39:40Z,7551,US,Too Much,Drake,Nothing Was The Same,spotify:track:5kdcXPj84Pb0HG6K8c1W1O,trackdone,endplay,False,True
2,2015-11-30T16:44:59Z,180666,US,6 God,Drake,If You're Reading This It's Too Late,spotify:track:3a8tAZFJxlmBwOtrf5L1oC,clickrow,trackdone,False,False
3,2015-11-30T17:04:42Z,147473,US,Jungle,Drake,If You're Reading This It's Too Late,spotify:track:7JXZq0JgG2zTrSOAgY8VMC,trackdone,endplay,False,True
4,2015-11-30T17:08:46Z,217706,US,Big Rings,Drake,What A Time To Be Alive,spotify:track:7jslhIiELQkgW9IHeYNOWE,clickrow,trackdone,False,False
...,...,...,...,...,...,...,...,...,...,...,...
982,2025-01-30T23:46:53Z,880,US,Love Is Only a Feeling,Joey Bada$$,Love Is Only a Feeling,spotify:track:7umZiitjVsEjMQ6HNddpUI,fwdbtn,fwdbtn,True,True
983,2025-01-30T23:51:00Z,247933,US,Thinking About You (feat. Ayah Marar),Calvin Harris,18 Months,spotify:track:1KtD0xaLAikgIt5tPbteZQ,fwdbtn,trackdone,True,False
984,2025-01-30T23:55:14Z,253346,US,waves - Tame Impala Remix,Miguel,Rogue Waves,spotify:track:3lB0GMiI5KxDbTOG8V3bOx,trackdone,trackdone,True,False
985,2025-01-30T23:56:44Z,89535,US,Coffin,Lil Yachty,Lil Boat 3.5,spotify:track:5vDNippoMr52KpXBO9b9KQ,trackdone,trackdone,True,False


In [11]:
spotify['timestamp'] = pd.to_datetime(spotify['timestamp'])

In [12]:
spotify['timestamp'] = spotify['timestamp'].dt.tz_convert('US/Pacific')
spotify['year'] = spotify['timestamp'].dt.year
spotify['month'] = spotify['timestamp'].dt.month
spotify['hour'] = spotify['timestamp'].dt.hour
spotify['second_played'] = spotify['ms_played'] / 1000
spotify['minute_played'] = spotify['second_played'] / 60
spotify['season'] = spotify['month'].apply(lambda x: 'Winter' if x in [12, 1, 2] else ('Spring' if x in [3, 4, 5] else ('Summer' if x in [6, 7, 8] else 'Fall')))
spotify['month/year'] = spotify['timestamp'].dt.to_period('M')
spotify['year_month'] = spotify['timestamp'].dt.year * 100 + spotify['timestamp'].dt.month

  spotify['month/year'] = spotify['timestamp'].dt.to_period('M')


In [15]:
spotify.info()

<class 'pandas.core.frame.DataFrame'>
Index: 112937 entries, 0 to 986
Data columns (total 19 columns):
 #   Column         Non-Null Count   Dtype                     
---  ------         --------------   -----                     
 0   timestamp      112937 non-null  datetime64[ns, US/Pacific]
 1   ms_played      112937 non-null  int64                     
 2   country        112937 non-null  object                    
 3   track_name     112568 non-null  object                    
 4   artist_name    112568 non-null  object                    
 5   album_name     112568 non-null  object                    
 6   track_uri      112568 non-null  object                    
 7   reason_start   112937 non-null  object                    
 8   reason_end     112937 non-null  object                    
 9   shuffle        112937 non-null  bool                      
 10  skipped        112937 non-null  bool                      
 11  year           112937 non-null  int32                     
 

In [17]:
spotify

Unnamed: 0,timestamp,ms_played,country,track_name,artist_name,album_name,track_uri,reason_start,reason_end,shuffle,skipped,year,month,hour,second_played,minute_played,season,month/year,year_month
0,2015-11-30 08:39:32-08:00,205879,US,Jumpman,Drake,What A Time To Be Alive,spotify:track:27GmP9AWRs744SzKcpJsTZ,clickrow,trackdone,False,False,2015,11,8,205.879,3.431317,Fall,2015-11,201511
1,2015-11-30 08:39:40-08:00,7551,US,Too Much,Drake,Nothing Was The Same,spotify:track:5kdcXPj84Pb0HG6K8c1W1O,trackdone,endplay,False,True,2015,11,8,7.551,0.125850,Fall,2015-11,201511
2,2015-11-30 08:44:59-08:00,180666,US,6 God,Drake,If You're Reading This It's Too Late,spotify:track:3a8tAZFJxlmBwOtrf5L1oC,clickrow,trackdone,False,False,2015,11,8,180.666,3.011100,Fall,2015-11,201511
3,2015-11-30 09:04:42-08:00,147473,US,Jungle,Drake,If You're Reading This It's Too Late,spotify:track:7JXZq0JgG2zTrSOAgY8VMC,trackdone,endplay,False,True,2015,11,9,147.473,2.457883,Fall,2015-11,201511
4,2015-11-30 09:08:46-08:00,217706,US,Big Rings,Drake,What A Time To Be Alive,spotify:track:7jslhIiELQkgW9IHeYNOWE,clickrow,trackdone,False,False,2015,11,9,217.706,3.628433,Fall,2015-11,201511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,2025-01-30 15:46:53-08:00,880,US,Love Is Only a Feeling,Joey Bada$$,Love Is Only a Feeling,spotify:track:7umZiitjVsEjMQ6HNddpUI,fwdbtn,fwdbtn,True,True,2025,1,15,0.880,0.014667,Winter,2025-01,202501
983,2025-01-30 15:51:00-08:00,247933,US,Thinking About You (feat. Ayah Marar),Calvin Harris,18 Months,spotify:track:1KtD0xaLAikgIt5tPbteZQ,fwdbtn,trackdone,True,False,2025,1,15,247.933,4.132217,Winter,2025-01,202501
984,2025-01-30 15:55:14-08:00,253346,US,waves - Tame Impala Remix,Miguel,Rogue Waves,spotify:track:3lB0GMiI5KxDbTOG8V3bOx,trackdone,trackdone,True,False,2025,1,15,253.346,4.222433,Winter,2025-01,202501
985,2025-01-30 15:56:44-08:00,89535,US,Coffin,Lil Yachty,Lil Boat 3.5,spotify:track:5vDNippoMr52KpXBO9b9KQ,trackdone,trackdone,True,False,2025,1,15,89.535,1.492250,Winter,2025-01,202501


In [16]:
#spotify.to_csv('/content/drive/My Drive/final_spotify_streaming_history.csv', index=False)

# Analysis

In [18]:
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')

spotify_filtered = spotify[spotify['year'] < 2025]
artist_monthly_playtime = spotify_filtered.groupby(['year_month', 'artist_name'])['minute_played'].sum().reset_index()
song_monthly_playtime = spotify_filtered.groupby(['year_month', 'track_name'])['minute_played'].sum().reset_index()


artist_monthly_playtime['year_month'] = pd.to_datetime(artist_monthly_playtime['year_month'], format='%Y%m')
song_monthly_playtime['year_month'] = pd.to_datetime(song_monthly_playtime['year_month'], format='%Y%m')

artist_monthly_playtime = artist_monthly_playtime.sort_values(['artist_name', 'year_month'])
song_monthly_playtime = song_monthly_playtime.sort_values(['track_name', 'year_month'])

artist_monthly_playtime['time_index'] = (artist_monthly_playtime['year_month'] - artist_monthly_playtime['year_month'].min()).dt.days
song_monthly_playtime['time_index'] = (song_monthly_playtime['year_month'] - song_monthly_playtime['year_month'].min()).dt.days


In [19]:
# Forecasting top 5 artists for 2025
artist_forecasts = []
for artist in artist_monthly_playtime['artist_name'].unique():
    artist_data = artist_monthly_playtime[artist_monthly_playtime['artist_name'] == artist]

    X = artist_data[['time_index']]
    y = artist_data['minute_played']

    model = LinearRegression()
    model.fit(X, y)

    last_time_index = artist_data['time_index'].max()
    future_time_index = last_time_index + 365
    prediction = model.predict([[future_time_index]])[0]

    artist_forecasts.append((artist, prediction))

forecast_df = pd.DataFrame(artist_forecasts, columns=['artist_name', 'predicted_minutes_2025'])
forecast_df = forecast_df.sort_values('predicted_minutes_2025', ascending=False)

top_5_artists = forecast_df.head(7)
print("Top 5 Artists for 2025 (Predicted Minutes Played):")
print(top_5_artists)

Top 5 Artists for 2025 (Predicted Minutes Played):
          artist_name  predicted_minutes_2025
1351     Teedra Moses              848.713081
1437     Travis Scott              638.942911
753    Kendrick Lamar              449.776271
1341      Tame Impala              378.436034
945   Michael Jackson              277.773452
401       Don Toliver              265.453673
407             Drake              264.025978


In [20]:
song_forecasts = []
for song in song_monthly_playtime['track_name'].unique():
    song_data = song_monthly_playtime[song_monthly_playtime['track_name'] == song]

    X = song_data[['time_index']]
    y = song_data['minute_played']

    model = LinearRegression()
    model.fit(X, y)

    last_time_index = song_data['time_index'].max()
    future_time_index = last_time_index + 365  # 365 days ahead
    prediction = model.predict([[future_time_index]])[0]

    song_forecasts.append((song, prediction))

song_forecast_df = pd.DataFrame(song_forecasts, columns=['track_name', 'predicted_minutes_2025'])
song_forecast_df = song_forecast_df.sort_values('predicted_minutes_2025', ascending=False)

top_5_songs = song_forecast_df.head(7)
print("\nTop 5 Songs for 2025 (Predicted Minutes Played):")
print(top_5_songs)


Top 5 Songs for 2025 (Predicted Minutes Played):
                                             track_name  \
6512                                       reincarnated   
1390               Disillusioned (with serpentwithfeet)   
607                   Be Your Girl (Kaytranada Edition)   
6381  dodger blue (feat. wallie the sensei, siete7x,...   
5418                                      That's a Rack   
6270                   You Ain't Gotta Lie (Momma Said)   
1933                                         Fucc It Up   

      predicted_minutes_2025  
6512             1501.458111  
1390             1080.484189  
607               640.262364  
6381              628.610122  
5418              412.917048  
6270              395.396820  
1933              378.614710  
