# Package installation

In [None]:
!pip install spotipy
!pip install pymongo
!pip install nbconvert

# Libraries

In [1]:
import pyspark
from pyspark.sql import SparkSession
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import datetime
import os
import pymongo
from pymongo import MongoClient

# Configuring MongoDB

In [2]:
# MongoDB configuration
mongo_uri = "mongodb://localhost:27017/?directConnection=true"

try:
    client = MongoClient(mongo_uri,
        connectTimeoutMS=30000,
        socketTimeoutMS=None)
    print("Connection successful")
except:
    print("Unsuccessful")


db = client["spotify_test"]
doc = {"test": "success"}
db["spotify_data"].insert_one(doc)

Connection successful


<pymongo.results.InsertOneResult at 0x28da3e641c0>

# Spotify API authentication
- Includes test to ensure tokens are active and operating

In [11]:
#Authentication code for Spotify users
client_id = 'b99285112ec748a395163cb8fa403fa9'
client_secret = '40fb6a4179d94c1bbc564f9bf26e3aa8'
redirect_uri = 'http://127.0.0.1:8080/callback'
scope = 'user-library-read playlist-modify-public'

#Prompt for user authentication
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, 
                                               redirect_uri=redirect_uri, scope=scope))


#Test access
print('Printing users most recently saved songs...\n')
results = sp.current_user_saved_tracks()
for track in results['items']:
    print(track['track']['name'], '-', track['track']['artists'][0]['name'])

Printing users most recently saved songs...

The Question (feat. Lil Wayne) - Mac Miller
N95 - Kendrick Lamar
From The Ritz To The Rubble - Arctic Monkeys
Joker And The Thief - Wolfmother
Gloom - Djo
Freakin' Out On the Interstate - Briston Maroney
CAN YOU HEAR THE MOON - Grady
United In Grief - Kendrick Lamar
Thoughts of the Night - INNR CIRCLE
Spillways [Feat. Joe Elliott] - Ghost
インフェルノ - Mrs. GREEN APPLE
Little Girl - Death From Above 1979
Dimension - Wolfmother
Happen To Me - BENEE
C'est La Vie (with bbno$ & Rich Brian) - Yung Gravy
New Gold (feat. Tame Impala and Bootie Brown) - Gorillaz
Small Worlds - Mac Miller
Skin and Bones - Cage The Elephant
Typical Story - Hobo Johnson
Charmander - Aminé


# Category Collection creation

In [4]:
mongo_database_name = "spotify"

# Set up the pymongo client
mongo_client = pymongo.MongoClient(mongo_uri)
mongo_database = mongo_client[mongo_database_name]

# Import top categories from Spotify
categories_results = sp.categories(limit=20)
categories = [category['name'].lower() for category in categories_results['categories']['items']]

# Create a collection for each category
for category in categories:
    collection_name = f"{category}_playlist"
    collection = mongo_database[collection_name]
    print(f"Creating collection: {collection_name}")
    collection.insert_one({'test': 'success'})
    
    # Remove 'test' field after successful import
    if 'test' in collection.find_one():
        collection.update_many({}, {"$unset": {'test': 1}})
        print(f"Successfully removed 'test' field from {collection_name}")
    else:
        print(f"Collection {collection_name} does not contain 'test' field")

Creating collection: top lists_playlist
Collection top lists_playlist does not contain 'test' field
Creating collection: hip-hop_playlist
Collection hip-hop_playlist does not contain 'test' field
Creating collection: pop_playlist
Collection pop_playlist does not contain 'test' field
Creating collection: country_playlist
Collection country_playlist does not contain 'test' field
Creating collection: latin_playlist
Collection latin_playlist does not contain 'test' field
Creating collection: rock_playlist
Collection rock_playlist does not contain 'test' field
Creating collection: summer_playlist
Collection summer_playlist does not contain 'test' field
Creating collection: workout_playlist
Collection workout_playlist does not contain 'test' field
Creating collection: r&b_playlist
Collection r&b_playlist does not contain 'test' field
Creating collection: dance/electronic_playlist
Collection dance/electronic_playlist does not contain 'test' field
Creating collection: netflix_playlist
Collecti

# Import Song Data to Collections

In [12]:
mongo_database_name = "spotify"

# Set up the pymongo client
mongo_client = pymongo.MongoClient(mongo_uri)
mongo_database = mongo_client[mongo_database_name]

# Import top categories from Spotify
categories_results = sp.categories(limit=20)
categories = [category['name'].lower() for category in categories_results['categories']['items']]

# Timestamp
now = datetime.datetime.now()

# Check last category import date to avoid duplicate imports
for category in categories:
    collection_name = category + '_playlist'
    collection = mongo_database[collection_name]
    last_import = collection.find_one(sort=[("import_date", pymongo.DESCENDING)])
    if last_import is None or now - last_import.get('import_date', 
                                                    datetime.datetime.min) > datetime.timedelta(days=1):
        print(f'pulling {category} playlist data')
        # Search for playlists and extract relevant song information
        playlist_results = sp.search(q=f'category:"{category}"', type='playlist', limit=50)
        playlist_data = []
        for playlist in playlist_results['playlists']['items']:
            tracks = sp.playlist_tracks(playlist['id'])
            for track in tracks['items']:
                if track['track'] is not None and track['track']['name'] is not None:
                    song_info = {
                        'song_name': track['track']['name'],
                        'artist': track['track']['artists'][0]['name'],
                        'album': track['track']['album']['name'],
                        'playlist_name': playlist['name'],
                        'playlist_id': playlist['id'],
                        'track_uri': track['track']['uri'],
                        'import_date': now
                    }
                    playlist_data.append(song_info)
                else:
                    print("Invalid track data:", track)
        # Write categories to MongoDB
        if playlist_data:
            # check if the collection already exists in the database
            if collection_name not in mongo_database.list_collection_names():
                mongo_database.create_collection(collection_name)
                print(f'{collection_name} collection created')
            collection.insert_many(playlist_data)
            print(f'finished pulling {category} playlist data')
        else:
            print(f'no new {category} playlist data')
    else:
        print(f'{category} playlist data already up to date')
print('data upload to mongodb completed')

pulling top lists playlist data
no new top lists playlist data
pulling hip-hop playlist data
finished pulling hip-hop playlist data
pulling pop playlist data
finished pulling pop playlist data
pulling country playlist data
finished pulling country playlist data
pulling latin playlist data
finished pulling latin playlist data
pulling rock playlist data
finished pulling rock playlist data
pulling summer playlist data
finished pulling summer playlist data
pulling workout playlist data
finished pulling workout playlist data
pulling r&b playlist data
finished pulling r&b playlist data
pulling dance/electronic playlist data
finished pulling dance/electronic playlist data
pulling netflix playlist data
no new netflix playlist data
pulling indie playlist data
finished pulling indie playlist data
pulling mood playlist data
Invalid track data: {'added_at': '2020-04-05T02:00:33Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/1299737591'}, 'href': 'https://api.spotify.co

# Playlist Generation
- Prompts for User Input regarding both category selection and per category song counts

In [6]:
# Playlist Generation
now = datetime.datetime.now()
playlist_name = now.strftime('%Y-%m-%d %H:%M:%S')

# Category Names
category_names = mongo_database.list_collection_names()

# Prompt for user input - categories for playlist generation
print(f"Categories available in the database: {', '.join(category_names)}")
categories = input("Enter categories to include in the playlist (separated by commas): ").split(",")
categories = [c.strip().lower() for c in categories]

# Prompt for user input - number of songs
num_songs = int(input("Enter number of songs to include from each selected category: "))

# Create playlist
user_id = sp.current_user()['id']
playlist = sp.user_playlist_create(user=user_id, name=playlist_name)

# Select random songs and add to playlist
for category in categories:
    if category in categories:
        collection_name = category + '_playlist'
        if collection_name in category_names:
            collection = mongo_database[collection_name]
            pipeline = [{'$sample': {'size': num_songs}}]
            results = list(collection.aggregate(pipeline))
            if results:
                song_info = []
                for result in results:
                    if result.get('song_name'):
                        song_info.append(result)
                    else:
                        print(f"No song found for {category} category")
                if song_info:
                    track_uris = [song.get('track_uri') for song in song_info]
                    sp.user_playlist_add_tracks(user=user_id, playlist_id=playlist['id'], 
                                                tracks=track_uris)
                else:
                    print(f"No song found for {category} category")
            else:
                print(f"No data found for {category} category")
        else:
            print(f"{category} category doesn't exist in the database")

print(f"Playlist '{playlist_name}' created in your Spotify account")

Categories available in the database: dance/electronic_playlist, workout_playlist, rock_playlist, equal_playlist, top lists_playlist, pop_playlist, country_playlist, r&b_playlist, summer_playlist, gaming_playlist, christian & gospel_playlist, hip-hop_playlist, latin_playlist, netflix_playlist, mood_playlist, regional mexican_playlist, indie_playlist, chill_playlist, sleep_playlist, wellness_playlist
Enter categories to include in the playlist (separated by commas): workout,rock,hip-hop
Enter number of songs to include from each selected category: 30
Playlist '2023-03-31 19:17:02' created in your Spotify account
