#### This notebook holds a machine learning model that predicts which artist or genre a user will listen to based on the time of day
**Models**: random forest or logistical regression <br>
**Features**: hour, day_of_week, is_weekend, season <br>
**Target**: genre/artist (prefrebly artists) <br>
**Desired Output**: "Its 9pm on a friday, You will probably listen to X"

In [1]:
from spotipy import Spotify
from spotipy.oauth2 import SpotifyOAuth
from collections import Counter
import requests
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np
import os
import csv
from dotenv import load_dotenv
load_dotenv()

from spotifyAnalysis import recently_played_tracks

### Authetication

In [2]:
def get_spotify_client():
    auth_manager = SpotifyOAuth(
        client_id = os.getenv("SPOTIPY_CLIENT_ID"),
        client_secret = os.getenv("SPOTIPY_CLIENT_SECRET"),
        redirect_uri = os.getenv("SPOTIPY_REDIRECT_URI"),
        scope = "user-read-private user-top-read user-read-recently-played"
    )
    sp = Spotify(auth_manager=auth_manager)
    return sp, sp.auth_manager.get_access_token(as_dict=False)

sp, token = get_spotify_client()
print("Connected to Spotify api")

Connected to Spotify api


## Data Collection
- track name, artist, genre
- played_at
    - hour, day_of_week, is_weekend

In [3]:
def generate_df(sp, limit=50, csv_path="listening_habbits.csv"):
    data = []
    tracks = recently_played_tracks(sp, limit=limit)
    for track in tracks:
        name = track['name']
        artist = track['artists']
        played_at = track['played_at']
        genre = track['genre']

        timestamp = pd.to_datetime(played_at)
        hour = timestamp.hour
        day_of_week = timestamp.dayofweek
        is_weekend = 1 if day_of_week >=5 else 0

        data.append({
            'name': name,
            'artist': artist,
            'genre': genre,
            'played_at': played_at,
            'hour': hour,
            'day_of_week': day_of_week,
            'is_weekend': is_weekend,
            'is_workday': 1 - is_weekend
        })

    df = pd.DataFrame(data)
    df['time_of_day'] = pd.cut(df['hour'],
                            bins=[-1, 6, 12, 18, 24],
                            labels=['Night', 'Morning', 'Afternoon', 'Evening'])
    df = df.drop_duplicates(subset=['name', 'artist'])
    df['played_at'] = pd.to_datetime(df['played_at'])

    # load csv into function
    if os.path.exists(csv_path):
        existing_df = pd.read_csv(csv_path, parse_dates=['played_at'])
        combined_df = pd.concat([existing_df, df], ignore_index=True)
        combined_df.drop_duplicates(subset=['name', 'artist', 'played_at'], inplace=True)
    else:
        combined_df = df

    # fill in missing genres
    genre_cache = {} 
    for idx, row in combined_df[combined_df['genre'] == 'Unknown'].iterrows():
        artist = row['artist']
        if artist not in genre_cache:
            genre_cache[artist] = get_artist_genre(sp, artist)
        combined_df.at[idx, 'genre'] = genre_cache[artist]
    
    combined_df.to_csv(csv_path, index=False)
    return combined_df

def get_artist_genre(sp, artist_name):
    results = sp.search(q=f'artist: {artist_name}', type='artist', limit=1)
    if results['artists']['items']:
        artist = results['artists']['items'][0]
        genres = artist['genres']
        return genres[0] if genres else 'Unknown'
    return "Unknown"

generate_df(sp, limit=50)


Unnamed: 0,name,artist,genre,played_at,hour,day_of_week,is_weekend,is_workday,time_of_day
0,I Don't Mind,M60,Unknown,2025-11-17 18:11:12.435000+00:00,18,0,0,1,Afternoon
1,Motorcycle Boy,Fontaines D.C.,post-punk,2025-11-17 18:07:33.253000+00:00,18,0,0,1,Afternoon
2,Bug,Fontaines D.C.,post-punk,2025-11-17 18:03:51.074000+00:00,18,0,0,1,Afternoon
3,Death Kink,Fontaines D.C.,post-punk,2025-11-17 18:01:02.525000+00:00,18,0,0,1,Afternoon
4,Starburster,Fontaines D.C.,post-punk,2025-11-17 17:57:45.931000+00:00,17,0,0,1,Afternoon
5,7,Catfish and the Bottlemen,indie,2025-11-17 17:54:04.844000+00:00,17,0,0,1,Afternoon
6,Emily,Catfish and the Bottlemen,indie,2025-11-17 17:49:48.338000+00:00,17,0,0,1,Afternoon
7,Soundcheck,Catfish and the Bottlemen,indie,2025-11-17 17:47:21.155000+00:00,17,0,0,1,Afternoon
8,Tyrants,Catfish and the Bottlemen,indie,2025-11-17 17:43:41.865000+00:00,17,0,0,1,Afternoon
9,Cocoon,Catfish and the Bottlemen,indie,2025-11-17 17:43:24.578000+00:00,17,0,0,1,Afternoon
