In [1]:
# Set up env variables 
# This should be SPOTIPY_CLIENT_ID and SPOTIPY_CLIENT_SECRET in the .env file
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Imports, this also sets up the Spotify API
import pickle
import pandas as pd

from data_utils.clean import *
from pathlib import Path

In [3]:
# Retrieve songs from files
songs = cat_files()
unique_songs = get_unique_songs(songs)

In [4]:
file = Path("data/saves/uri.p")
if not file.is_file():
    # Get Unique URI from song list using Spotify API. this process takes a very long time, ~400 songs / 1min
    uri_map = get_uri_mapping(unique_songs)

    # Pickles uri to file to avoid rerunning
    with open("data/saves/uri.p", "wb") as f:
        pickle.dump(uri_map, f, protocol=pickle.HIGHEST_PROTOCOL)
else:
    # Loads file if it already exists
    with open("data/saves/uri.p", "rb") as f:
        uri_map = pickle.load(f)


In [5]:
file = Path("data/saves/features.p")
if not file.is_file():
    # Get features based on URIs. This takes about as much time as retrieving URIs
    features = get_features(list(uri_map.values())[5000:])

    # Pickles features to file to avoid rerunning
    with open("data/saves/features.p", "wb") as f:
        pickle.dump(features, f, protocol=pickle.HIGHEST_PROTOCOL)
else:
    # Loads file if it already exists
    with open("data/saves/features.p", "rb") as f:
        features = pickle.load(f)

In [6]:
file = Path("data/saves/features.p")
if not file.is_file():
    # Get features based on URIs. This takes about as much time as retrieving URIs
    features = get_features(list(uri_map.values()))

    # Pickles features to file to avoid rerunning
    with open("data/saves/features.p", "wb") as f:
        pickle.dump(features, f, protocol=pickle.HIGHEST_PROTOCOL)
else:
    # Loads file if it already exists
    with open("data/saves/features.p", "rb") as f:
        features = pickle.load(f)

In [7]:
file = Path("data/saves/stream_time.p")
if not file.is_file():
    stream_times = total_listen_time(songs, uri_map)

    with open("data/saves/stream_time.p", "wb") as f:
        pickle.dump(stream_times, f, protocol=pickle.HIGHEST_PROTOCOL)
else:
    # Loads file if it already exists
    with open("data/saves/stream_time.p", "rb") as f:
        stream_times = pickle.load(f)

In [8]:
# Create csv from uris and feature mappings
feature_df = mappings_to_csv(uri_map, features, stream_times)

# Create csv from streaming time including URI, excluding tracks that do not have a URI mapping
stream_df = stream_time_to_csv(songs, uri_map)

# Combine both streaming data and track feaature data to be used for visualization
combined_df = combine_feature_stream(feature_df, stream_df)

# Create monthly data to be used for visualization
monthly_df = monthy_artist_metrics(combined_df)

# Create artist detail to be use for tooltips
details = artist_detail_metrics(combined_df)

In [None]:
pd.read_csv("data/dataframes/stream_history.csv").head()

In [None]:
pd.read_csv("data/dataframes/track_features.csv").head()

In [None]:
pd.read_csv("data/dataframes/combined_data.csv").head()