In [1]:
from simplejustwatchapi.justwatch import search
import json
import pandas as pd
import time

In [2]:
#Set streaming country and streaming language of your choice
streaming_country = "TR"
streaming_language = "tr"

In [3]:
#Set streaming services that you have membership (Some examples, don't have the full list: 
#'Netflix', 'MUBI', 'Apple TV', 'Amazon Prime Video', 'Google Play Movies', 'Disney Plus', 'blutv', 'EXXEN', 'TV+', 'puhutv', 'WOW Presents Plus'
my_streaming_services = ['Netflix', 'MUBI', 'Amazon Prime Video', 'blutv', 'EXXEN']

In [4]:
#Set which types you want to include ['Movie', 'TV Mini Series', 'TV Series', 'Short']
title_type = ["Movie", "Short"]

In [5]:
#Retrieve IMDB Watchlist 
#Can be downloaded via https://www.imdb.com/user/{user_id}/watchlist/

# Replace 'file_path.csv' with the path to your CSV file
file_path = 'my watchlist.csv'
df = pd.read_csv(file_path)

# List of columns to be removed
columns_to_remove = ['Position', 'Const', 'Created', 'Modified', 'Description', 'URL', 'Num Votes', 'Release Date', 'Your Rating', 'Date Rated']
simplified_df = df.drop(columns = columns_to_remove)

#Filtering the type of the content (I include only the movies)
simplified_df = simplified_df[simplified_df['Title Type'].isin(title_type)]
simplified_df.reset_index(drop=True, inplace=True)

In [11]:
#Function to fetch data from Justwatch API
def fetch_data(title, streaming_country, streaming_language, retries = 5, delay = 5):
    for attempt in range(retries):
        try:
            results = search(title, streaming_country, streaming_language, 1, True)
            return results
        except Exception as e:  # Replace with the actual exception you expect
            print(f"An error occurred: {e}")
            time.sleep(delay)
            break
    print("Failed to fetch search results after multiple retries.")
    return []

In [13]:
#Function to retrieve streaming platform information from fetched data
def extract_streamers(json_result):
    my_dict = {index: value for index, value in enumerate(json_result)}
    my_json = json.dumps(my_dict, indent = 4)
    #print(my_json)

    data = json.loads(my_json)
    
    services = data['0'][-1] 
    
    j = 0
    service_list = []
    
    while j < len(services):
        service_name = services[j][8][2]
        service_list.append(service_name)
        j += 1
        
    return service_list

In [15]:
#Takes the watchlist dataframe, then tries to fetch streaming platform by looking at the original title & if not English title
service_lists = []
i = 0

while i < len(simplified_df):
    title = simplified_df["Original Title"][i]
    alternative_title = simplified_df["Title"][i]

    results = fetch_data(title, streaming_country, streaming_language, retries = 5, delay = 5)

    if results:
        service_list = extract_streamers(results)
    elif fetch_data(alternative_title, streaming_country, streaming_language, retries = 5, delay = 5):
        results = fetch_data(alternative_title, streaming_country, streaming_language, retries = 5, delay = 5)
        service_list = extract_streamers(results)
    else:
        service_list = []
    
    i += 1    
    service_lists.append(service_list)

print(f"Streaming platforms for {len(service_lists)} movies fetched successfully.") 

Streaming platforms for 337 movies fetched successfully.


In [17]:
#Filtering the streamed services:
filtered_service_lists = [[item for item in sublist if item in my_streaming_services] for sublist in service_lists]

In [18]:
#Appends streaming platform to the watchlist
simplified_df['Service List'] = filtered_service_lists

In [19]:
#Filter movies if no streaming service available
df_filtered = simplified_df[simplified_df['Service List'].astype(bool)]

In [20]:
df_filtered

Unnamed: 0,Title,Original Title,Title Type,IMDb Rating,Runtime (mins),Year,Genres,Directors,Service List
1,Jackie Brown,Jackie Brown,Movie,7.5,154.0,1997,"Crime, Drama, Thriller",Quentin Tarantino,[Netflix]
2,The Godfather,The Godfather,Movie,9.2,175.0,1972,"Crime, Drama",Francis Ford Coppola,[Netflix]
3,Schindler's List,Schindler's List,Movie,9.0,195.0,1993,"Biography, Drama, History",Steven Spielberg,[Netflix]
11,Cloud Atlas,Cloud Atlas,Movie,7.4,172.0,2012,"Drama, Mystery, Sci-Fi, Thriller","Tom Tykwer, Lana Wachowski, Lilly Wachowski",[MUBI]
13,Spotlight,Spotlight,Movie,8.1,129.0,2015,"Biography, Crime, Drama",Tom McCarthy,[Netflix]
...,...,...,...,...,...,...,...,...,...
327,Saint Omer,Saint Omer,Movie,6.8,122.0,2022,Drama,Alice Diop,[MUBI]
328,Evil Does Not Exist,Aku wa sonzai shinai,Movie,7.1,106.0,2023,Drama,Ryûsuke Hamaguchi,[MUBI]
332,The Northman,The Northman,Movie,7.0,137.0,2022,"Action, Adventure, Drama, Fantasy, History, Th...",Robert Eggers,[Amazon Prime Video]
333,Parallel Mothers,Madres paralelas,Movie,7.1,123.0,2021,Drama,Pedro Almodóvar,[MUBI]


In [27]:
df_filtered.to_csv('watchlist_streaming_service.csv', index = False)