In [1]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import re
import time
import os
from dotenv import load_dotenv
import json
import pandas as pd
from tqdm import tqdm
from datetime import datetime
from tqdm import tqdm
import calendar
import random
import numpy as np
import base64
import requests

In [2]:
load_dotenv()

# Defining Spotify export data location
data_directory = os.getenv('EXPORT_PATH')

### Identifying streaming history audio files.

In [3]:
def find_streaming_history_files(directory):
    try:
        all_files = os.listdir(directory)
        
        streaming_history_files = [file for file in all_files if 'Streaming_History_Audio' in file]
        
        return streaming_history_files

    except FileNotFoundError:
        print(f"Error: The directory '{directory}' was not found.")
        return []

    except PermissionError:
        print(f"Error: You do not have permission to access the directory '{directory}'.")
        return []

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return []

In [4]:
file_names = find_streaming_history_files(data_directory)

### Loading and joining streaming data.

In [5]:
def load_and_join_streaming_data(directory, file_names):

    streaming_data = []

    for file_name in file_names:
        file_path = os.path.join(directory, file_name)
        try:
            with open(file_path, 'r') as file:
                data = json.load(file)
                if isinstance(data, list):
                    streaming_data.extend(data)  
                else:
                    streaming_data.append(data)  
                print(f"Appended data from {file_name}")
        except FileNotFoundError:
            print(f"Error: The file '{file_name}' was not found in the directory '{directory}'.")
        except json.JSONDecodeError:
            print(f"Error: Failed to decode JSON in the file '{file_name}'. The file might be corrupted.")
        except Exception as e:
            print(f"An unexpected error occurred while loading '{file_name}': {e}")

    return streaming_data

In [6]:
streaming_data = load_and_join_streaming_data(data_directory,file_names)

Appended data from Streaming_History_Audio_2022_4.json
Appended data from Streaming_History_Audio_2023-2024_7.json
Appended data from Streaming_History_Audio_2021-2022_3.json
Appended data from Streaming_History_Audio_2024_8.json
Appended data from Streaming_History_Audio_2023_6.json
Appended data from Streaming_History_Audio_2020_0.json
Appended data from Streaming_History_Audio_2020-2021_1.json
Appended data from Streaming_History_Audio_2021_2.json
Appended data from Streaming_History_Audio_2022-2023_5.json


### Converting data from list to a pandas dataframe.

In [7]:
def create_streaming_data_df(streaming_data):
    rows = []

    for entry in streaming_data:
        spotify_track_uri = entry.get('spotify_track_uri', '')
        track_id = spotify_track_uri.split(':')[-1] if spotify_track_uri else None

        rows.append({
            'endTime': entry['ts'],
            'artistName': entry['master_metadata_album_artist_name'],
            'trackName': entry['master_metadata_track_name'],
            'msPlayed': entry['ms_played'],
            'id': track_id 
        })

    streaming_data_df = pd.DataFrame(rows)

    streaming_data_df['endTime'] = pd.to_datetime(streaming_data_df['endTime'], format='%Y-%m-%dT%H:%M:%SZ')

    return streaming_data_df

In [8]:
streaming_data_df = create_streaming_data_df(streaming_data)

### Removing rows with Na IDs and creating new columns for easier grouping .

In [9]:
streaming_data_df = streaming_data_df.dropna(subset=['id'])

In [10]:
streaming_data_df['month'] = streaming_data_df['endTime'].dt.month
streaming_data_df['year'] = streaming_data_df['endTime'].dt.year

### Creating Spotify Web API instance.

In [11]:
def get_spotify_instance():
    load_dotenv()

    sp_oauth = SpotifyOAuth(
        client_id=os.getenv('SPOTIPY_CLIENT_ID'),
        client_secret=os.getenv('SPOTIPY_CLIENT_SECRET'),
        redirect_uri=os.getenv('SPOTIPY_REDIRECT_URI'),
        scope='playlist-modify-public playlist-modify-private playlist-read-private',
        cache_path='.spotipyoauthcache'
    )

    try:
        token_info = sp_oauth.get_cached_token()

        if not token_info:
            print("No cached token found. Attempting to retrieve a new one...")
            token_info = sp_oauth.get_access_token(as_dict=True)

        return spotipy.Spotify(auth=token_info['access_token'], requests_timeout=20)
    
    except Exception as e:
        print(f"Error retrieving token: {e}")
        return None

### Removing specific playlists from analysis.
- **If these playlists do not include unique songs, the analysis might be inaccurate.**
    - On the other hand, this can be used to our advantage. If you want to exclude specific songs from the analysis you can manually create a playlist of songs to exclude and then pass it through the function. 

In [12]:
def remove_playlists(df):
    sp = get_spotify_instance()
    playlists_input = input('Enter the names of the playlists you would like to remove, separated by commas (enter "exit!" to continue without removing any playlist): ')
    
    if playlists_input.strip() == "exit!":
        print("No playlists entered. Exiting the function.")
        return df
    
    playlists_to_remove = [playlist.strip() for playlist in playlists_input.split(',')]
    
    playlists = dict()
    songs = set()
    idx_to_drop = list()

    limit = 50
    offset = 0 
    
    while True:
        result = sp.current_user_playlists(limit=limit, offset=offset)
        for item in result['items']:
            playlists[item['name']] = item['id']
        if len(result['items']) < limit:
            break
        offset += limit
    
    for playlist in playlists_to_remove:
        if playlist not in playlists:
            print(f'The playlist name "{playlist}" does not exist in your library. Skipping.')
            continue
        else:
            print(f'Removing playlist: {playlist}')
            response = sp.playlist(playlist_id=playlists[playlist])
            for track in response['tracks']['items']:
                songs.add(track['track']['id'])
    
    for idx, row in df.iterrows():
        if row['id'] in songs:
            idx_to_drop.append(idx)
    
    df.drop(idx_to_drop, inplace=True)
    df.to_pickle(os.path.join(os.getcwd(),'streaming_data_df.pkl'))
    
    return df

In [13]:
filtered_streaming_data_df = remove_playlists(streaming_data_df)

Enter the names of the playlists you would like to remove, separated by commas (enter "exit!" to continue without removing any playlist): exit!
No playlists entered. Exiting the function.


### Creating playlists

In [14]:
def create_monthly_playlists(df):
    sp = get_spotify_instance()
        
    grouped_df = df.groupby(['year', 'month', 'id'], as_index=False).agg({'msPlayed': 'sum'})
    
    for (year, month), group in grouped_df.groupby(['year', 'month']):
        top_tracks = group.sort_values(by='msPlayed', ascending=False).head(20)
        track_ids = top_tracks['id'].tolist()
        
        month_name = calendar.month_name[month]
        playlist_name = f"{month_name} {year}"
        playlist_description = "https://github.com/kyriakos-papadopoulos/Projects/tree/main/API_Projects/Spotify/Monthly_Playlists"
        
        try:
            user_id = sp.current_user()['id']
            playlist = sp.user_playlist_create(user=user_id, name=playlist_name, public=True, description=playlist_description)
            
            if track_ids:
                sp.user_playlist_add_tracks(user=user_id, playlist_id=playlist['id'], tracks=track_ids)
                print(f"Playlist '{playlist_name}' created successfully!")
            else:
                print(f"No tracks were added to the playlist '{playlist_name}'.")
            
            time.sleep(2)
            
        except spotipy.SpotifyException as e:
            print(f"Error creating playlist '{playlist_name}': {e}")

In [15]:
create_monthly_playlists(filtered_streaming_data_df)

Playlist 'April 2020' created successfully!
Playlist 'May 2020' created successfully!
Playlist 'June 2020' created successfully!
Playlist 'July 2020' created successfully!
Playlist 'August 2020' created successfully!
Playlist 'September 2020' created successfully!
Playlist 'October 2020' created successfully!
Playlist 'November 2020' created successfully!
Playlist 'December 2020' created successfully!
Playlist 'January 2021' created successfully!
Playlist 'February 2021' created successfully!
Playlist 'March 2021' created successfully!
Playlist 'April 2021' created successfully!
Playlist 'May 2021' created successfully!
Playlist 'June 2021' created successfully!
Playlist 'July 2021' created successfully!
Playlist 'August 2021' created successfully!
Playlist 'September 2021' created successfully!
Playlist 'October 2021' created successfully!
Playlist 'November 2021' created successfully!
Playlist 'December 2021' created successfully!
Playlist 'January 2022' created successfully!
Playlis