In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from pymongo import MongoClient
import time
from datetime import datetime, timedelta

In [2]:
# Set up MongoDB connection
client = MongoClient('localhost',27017)
db = client.project
Movies_col = db["Movies"]
Movie_Albums_col = db["Movie_Albums"]
spotify_col = db["Spotify"]

In [3]:
# Spotify API credentials
client_id = "4db0521b60254bef948d398eb66aaafd"
client_secret = "b502ecc5b90e46d1b46de5b100bd0662"

# Auth manager
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id=client_id,
    client_secret=client_secret))

In [4]:
def get_all_movie_titles_by_date(start_date_str, end_date_str):
    
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")

    query = {"release_date": {
            "$gte": start_date_str,
            "$lte": end_date_str}}

    projection = {"_id": 0, "title": 1, "release_date": 1}
    cursor = db["Movies"].find(query, projection)

    movie_titles = []
    for doc in cursor:
        title = doc.get("title")
        if title:
            movie_titles.append(title)

    return movie_titles

movie_lists = get_all_movie_titles_by_date("2025-01-01", "2025-01-02")

In [8]:
def search_and_store_spotify_album(term):
    results = sp.search(q=term + "Movie Sound Track", type='album', limit=1)
    albums = results['albums']['items']

    for album in albums:
        Movie_Albums_col.insert_one({
            "search_term": term,
            "raw_data": album })

def fetch_spotify_albums_for_all_movies():
    movie_titles = movie_lists

    for title in movie_titles:
        try:
            search_and_store_spotify_album(title)
            print(f"Fetched album(s) for: {title}")
        except Exception as e:
            print(f"Error fetching for '{title}': {e}")

fetch_spotify_albums_for_all_movies()

Fetched album(s) for: The Wizard of the Emerald City, Part 1
Fetched album(s) for: The Love Scam
Fetched album(s) for: Don't Die: The Man Who Wants to Live Forever
Fetched album(s) for: Eephus
Fetched album(s) for: How to Make a Killing
Fetched album(s) for: Seeking Haven for Mr. Rambo
Fetched album(s) for: Reality+
Fetched album(s) for: Rambo
Fetched album(s) for: Dove osano le cicogne
Fetched album(s) for: Quiet Life
Fetched album(s) for: Knock on my Tver
Fetched album(s) for: A Message from Martha
Fetched album(s) for: Delirium
Fetched album(s) for: Pororo: Underwater Adventure
Fetched album(s) for: Mukkam Post Bombilwaadi
Fetched album(s) for: Khashaba
Fetched album(s) for: End of History
Fetched album(s) for: All My Friends
Fetched album(s) for: New Year's Concert 2025
Fetched album(s) for: Géminis
Fetched album(s) for: Checkmate
Fetched album(s) for: Parvin
Fetched album(s) for: WWE Best of Raw 2024 Special
Fetched album(s) for: El Dashash
Fetched album(s) for: Meteors
Fetched al

In [5]:
def get_album_ids_from_raw_data():
    album_ids = []
    cursor = db["Movie_Albums"].find({}, {"raw_data.id": 1})
    for doc in cursor:
        ids = doc["raw_data"]["id"]
        album_ids.append(ids)
    return album_ids


In [6]:
album_ids = get_album_ids_from_raw_data()
len(album_ids)

53933

In [None]:
from collections import OrderedDict
from pprint import pprint

def fetch(album_ids):
    for index, album_id in enumerate(album_ids[10000:13000], start=10001):
        try:
            # Lookup from Movie_Albums
            movie_doc = db["Movie_Albums"].find_one({
                "$expr": { "$eq": ["$raw_data.id", album_id] }
            })

            if not movie_doc:
                print(f" Skipping. Album ID {album_id} not found in Movie_Albums.")
                continue

            movie_title = movie_doc.get("search_term", "UNKNOWN TITLE")
            raw_info = movie_doc.get("raw_data", {})

            album_info = {
                "album_id": raw_info.get("id"),
                "album_name": raw_info.get("name"),
                "release_date": raw_info.get("release_date"),
                "total_tracks": raw_info.get("total_tracks"),
                "spotify_url": raw_info.get("external_urls", {}).get("spotify"),
                "artists": [artist.get("name") for artist in raw_info.get("artists", [])]
            }

            # Fetch from Spotify API
            album_raw = sp.album(album_id)
            album_details = {
                "id": album_raw.get("id"),
                "name": album_raw.get("name"),
                "release_date": album_raw.get("release_date"),
                "total_tracks": album_raw.get("total_tracks"),
                "type": album_raw.get("album_type"),
                "uri": album_raw.get("uri"),
                "artists": album_raw.get("artists"),
                "tracks": album_raw.get("tracks"),
                "popularity": album_raw.get("popularity"),
                "label": album_raw.get("label"),
                "copyrights": album_raw.get("copyrights")
            }

            # Combine in desired order
            final_doc = OrderedDict()
            final_doc["movie_title"] = movie_title
            final_doc["album_info"] = album_info
            final_doc["album_details"] = album_details

            #  Store in Spotify collection
            db["spotify"].update_one(
                {"album_info.album_id": album_id},
                {"$set": final_doc},
                upsert=True
            )

            print(f" Stored to Spotify: {movie_title} ({album_id})")

        except Exception as e:
            print(f" Error processing album ID {album_id}: {e}")

fetch(album_ids)



 Stored to Spotify: Inception (2qvA7HmSg1iM6XMiFF76dp)
