# Create playlists database

In [None]:
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm.notebook import tqdm
import pandas as pd
import random
import spotipy

In [None]:
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## Read Spotify users file

In [None]:
with open('../../data/sp_users.txt') as f:
    users = [line.rstrip('\n') for line in f]

## Gather the playlists of the users

In [None]:
playlists = []
for user in tqdm(users):
    their_playlists = sp.user_playlists(user)
    playlists.extend(their_playlists['items'])
    while their_playlists['next']:
        their_playlists = sp.next(their_playlists)
        playlists.extend(their_playlists['items'])        

### Optional: get number of followers info

In [None]:
# for i, playlist in tqdm(enumerate(playlists.copy()), total=len(playlists.copy())):
#     playlists[i] = sp.playlist(playlists[i]['id'])

## Treat dataset

In [None]:
# Filter columns
df = pd.DataFrame(playlists, columns=[
    'collaborative',
    'description',
#     'external_urls',
#     'followers',
#     'href',
    'id',
#     'images',
    'name',
    'owner',
    'primary_color',
    'public',
#     'snapshot_id',
    'tracks',
#     'type',
#     'uri'
])

# Expand owner dict
df2 = df['owner'].apply(pd.Series)[['display_name', 'id']]
df2.rename(columns={
    'display_name': 'owner_display_name',
    'id': 'owner_id'
}, inplace=True)
df = df.join(df2)
df.drop(columns='owner', inplace=True)

# Remove duplicates
df.drop_duplicates('id')

# Playlist ID as index
df.set_index('id', inplace=True)

## Write CSV

In [None]:
df.to_csv('../../data/sp_playlists.csv')