In [2]:
from dotenv import load_dotenv
import os
import requests

load_dotenv()

TMBD_API_KEY=os.environ["TMDB_API_KEY"]
TMBD_ACCESS_TOKEN=os.environ["TMDB_ACCESS_TOKEN"]

In [3]:
def get_movie_reviews(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/reviews?&page=1'
    headers = {
    'Authorization': 'Bearer '+TMBD_ACCESS_TOKEN,
    'accept': 'application/json',
    }
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        reviews_data = response.json()
        reviews_list = []
        

        for review in reviews_data.get("results", []):
            review_info = {
                "movie_id": movie_id,
                "rating": review["author_details"].get("rating"),
                "content": review["content"]
            }
            reviews_list.append(review_info)
        
        return reviews_list
    else:
        print("Failed to fetch reviews from TMDB API")
        return []  

In [4]:
def fetch_reviews_for_watched_movies(watched_movies):
    movie_reviews_dict = {}
    for movie_id in watched_movies:
        reviews = get_movie_reviews(movie_id)
        movie_reviews_dict[movie_id] = reviews
    return movie_reviews_dict

In [5]:
movies = [550,551]
reviews = fetch_reviews_for_watched_movies(movies)
print(reviews)

{550: [{'movie_id': 550, 'rating': None, 'content': 'Pretty awesome movie.  It shows what one crazy person can convince other crazy people to do.  Everyone needs something to believe in.  I recommend Jesus Christ, but they want Tyler Durden.'}, {'movie_id': 550, 'rating': 9.0, 'content': 'In my top 5 of all time favourite movies. Great story line and a movie you can watch over and over again.'}, {'movie_id': 550, 'rating': 8.0, 'content': 'If you enjoy reading my Spoiler-Free reviews, please follow my blog @\r\nhttps://www.msbreviews.com\r\n\r\nDavid Fincher’s new film, Mank, is coming soon on Netflix, released six years after his latest installment, Gone Girl. Therefore, this week I’m reviewing five of Fincher’s movies. Se7en was the first one, and now it’s time for one of the most culturally impactful films of the 90s, Fight Club. This is another rewatch of another filmmaking classic, one that I was never able to absolutely adore like most people. When this movie came out in 1999, cr

In [6]:
# rake to get keywords
def ensure_nltk_data():

    import nltk
    try:
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        nltk.download('punkt')

from rake_nltk import Rake

def extract_keywords_from_all_reviews(movie_reviews_dict):
    ensure_nltk_data()  
    movie_keywords_dict = {} 
    
    for movie_id, reviews in movie_reviews_dict.items():
        rake = Rake()
        all_phrases_with_scores = []
        for review in reviews:
            rake.extract_keywords_from_text(review['content'])
            phrases_with_scores = rake.get_ranked_phrases_with_scores()
            all_phrases_with_scores.extend(phrases_with_scores)

        # Sort phrases by score in descending order and keep unique
        sorted_phrases_with_scores = sorted(all_phrases_with_scores, key=lambda x: x[0], reverse=True)
        unique_keywords_with_scores = []
        seen_keywords = set()
        for score, phrase in sorted_phrases_with_scores:
            if phrase not in seen_keywords:
                unique_keywords_with_scores.append((score, phrase))
                seen_keywords.add(phrase)
            if len(unique_keywords_with_scores) == 100: 
                break

        movie_keywords_dict[movie_id] = unique_keywords_with_scores

    return movie_keywords_dict

In [7]:
positive_keywords = [
    "3-D", "absorbing", "acclaimed", "adult", "adventurous", "ambitious", "artistic",
    "astonishing", "avant-garde", "award-winning", "awe-inspiring", "based on", "beautiful",
    "beautifully filmed", "beautifully shot", "big-budget", "bold", "breathtaking", "brilliant",
    "captured", "cerebral", "character-driven", "charismatic", "cinematic", "coherent", "colorful",
    "comic", "compelling", "complex", "conceptual", "contemplative", "contemporary", "controversial",
    "conversational", "convincing", "creative", "critically acclaimed", "cult", "current", "daring",
    "deep", "important", "in-depth", "independent", "infused", "insightful", "inspirational", "inspired",
    "intellectual", "intellectually invigorating", "intelligent", "intense", "intensive", "interesting",
    "introspective", "intuitive", "inventive", "inventively edited", "ironic", "layered", "legendary",
    "light-hearted", "magical", "magnetic", "mature", "meaningful", "memorable", "mind-blowing", "modern",
    "moving", "must-see", "mysterious", "mystical", "narrative", "non-stop", "offbeat", "original",
    "passionate", "phenomenal", "playful", "plot-driven", "ponderous", "delightful", "dizzying", "dramatic",
    "edgy", "effective", "elevating", "eloquent", "emotional", "emotionally charged", "emotionally resonant",
    "enchanted", "engaging", "engrossing", "enigmatic", "entertaining", "epic", "evocative", "exceptional",
    "exciting", "exquisite", "extraordinary", "family-friendly", "fascinating", "fast-paced", "feel-good",
    "filmed", "filmed live", "fluid", "fresh", "fun", "funny", "futuristic", "graceful", "graphic", "gripping",
    "highly original", "historical", "honest", "humorous", "imaginative", "immensely talented",
    "potent", "powerful", "profound", "provoking", "pure", "quirky", "rated", "realistic", "recommended",
    "refined", "refreshing", "relevant", "remarkable", "resourceful", "revealing", "rich", "riveting",
    "romantic", "rousing", "sad", "sappy", "satirical", "sentimental", "sexy", "small-budget", "star-studded",
    "strong", "stunning", "superb", "suspenseful", "sweet", "theatrical", "thrilling", "touching",
    "underground", "unforgettable", "visionary", "visual", "well-paced", "worthwhile"
]


In [8]:
# more filteration using spacy 
import spacy
nlp = spacy.load("en_core_web_md")

def filter_keywords(text, positive_keywords, nlp):
    doc = nlp(text)
    matched_keywords = []


    keywords_tokens = [nlp(keyword)[0] for keyword in positive_keywords]


    for token in doc:

        if token.pos_ == 'ADJ':
            # Check each keyword token for similarity
            for keyword_token in keywords_tokens:
                # Check if the token matches or is similar to the keywords in your list
                if token.text.lower() == keyword_token.text.lower() or token.similarity(keyword_token) > 1.0:
                    matched_keywords.append(token.text.lower())
    

    return list(set(matched_keywords))



In [9]:
# recommendation part 
def list_to_pipe_string(lst):

    return "|".join(map(str, lst))

def discover_movies_by_genre(genre_ids):
    all_filtered_results = [] 

    for page in range(1, 10):  
        url = f'https://api.themoviedb.org/3/discover/movie?with_genres={"|".join(genre_ids)}&page={page}'
        headers = {
            'Authorization': 'Bearer ' + TMBD_ACCESS_TOKEN,
            'accept': 'application/json',
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            results = response.json()['results']
            filtered_results = [
                {
                    'id': movie['id'],
                    'original_title': movie['original_title'],
                    #'popularity': movie['popularity'],
                    #'vote_count': movie['vote_count']
                }
                for movie in results
            ]
            all_filtered_results.extend(filtered_results)  
        else:
            print(f"Failed to fetch movies from TMDB API for page {page}")
            break  
    return all_filtered_results


In [10]:
favourite_genres = [53]
favourite_genres = [str(genre) for genre in favourite_genres]
movies =  discover_movies_by_genre(favourite_genres)
print(len(movies))
print(movies)

180
[{'id': 1096197, 'original_title': 'No Way Up'}, {'id': 870404, 'original_title': 'Meteor'}, {'id': 969492, 'original_title': 'Land of Bad'}, {'id': 984249, 'original_title': 'Ruthless'}, {'id': 1227816, 'original_title': 'Red Right Hand'}, {'id': 866398, 'original_title': 'The Beekeeper'}, {'id': 714567, 'original_title': '#хочувигру'}, {'id': 1211483, 'original_title': 'Skal - Fight for Survival'}, {'id': 1028703, 'original_title': 'The OctoGames'}, {'id': 872542, 'original_title': 'Agent Game'}, {'id': 915935, 'original_title': "Anatomie d'une chute"}, {'id': 931642, 'original_title': 'Lights Out'}, {'id': 838209, 'original_title': '파묘'}, {'id': 1026436, 'original_title': "Miller's Girl"}, {'id': 823491, 'original_title': 'Out of Darkness'}, {'id': 799155, 'original_title': 'Attack'}, {'id': 385687, 'original_title': 'Fast X'}, {'id': 949697, 'original_title': 'The One Hundred'}, {'id': 1183905, 'original_title': 'Trunk - Locked In'}, {'id': 982940, 'original_title': 'Due Justic

In [11]:
# i have another funtion
def fetch_movie_reviews(movie_id):
    reviews_url = f'https://api.themoviedb.org/3/movie/{movie_id}/reviews?language=en-US&page=1'
    headers = {
    'Authorization': 'Bearer '+TMBD_ACCESS_TOKEN,
    'accept': 'application/json',
    }
    response = requests.get(reviews_url,headers=headers)
    if response.status_code == 200:
        reviews_data = response.json()
        reviews_list = []
        

        for review in reviews_data.get("results", []):
            review_info = {
                "movie_id": movie_id,
                "rating": review["author_details"].get("rating"),
                "content": review["content"]
            }
            reviews_list.append(review_info)
        
        return reviews_list
    return []

In [29]:
genre_ids = ["878"] 
movies = discover_movies_by_genre(genre_ids)
print(movies)

[{'id': 792307, 'original_title': 'Poor Things'}, {'id': 1239251, 'original_title': 'Megamind vs. the Doom Syndicate'}, {'id': 932420, 'original_title': 'Code 8 Part II'}, {'id': 693134, 'original_title': 'Dune: Part Two'}, {'id': 870404, 'original_title': 'Meteor'}, {'id': 1094556, 'original_title': 'The Thundermans Return'}, {'id': 438631, 'original_title': 'Dune'}, {'id': 609681, 'original_title': 'The Marvels'}, {'id': 636706, 'original_title': 'Spaceman'}, {'id': 940721, 'original_title': 'ゴジラ-1.0'}, {'id': 823464, 'original_title': 'Godzilla x Kong: The New Empire'}, {'id': 624091, 'original_title': 'Sri Asih'}, {'id': 949697, 'original_title': 'The One Hundred'}, {'id': 933131, 'original_title': '황야'}, {'id': 1207413, 'original_title': 'Dark Parasite'}, {'id': 799155, 'original_title': 'Attack'}, {'id': 569094, 'original_title': 'Spider-Man: Across the Spider-Verse'}, {'id': 667538, 'original_title': 'Transformers: Rise of the Beasts'}, {'id': 1078012, 'original_title': '熊出没·伴我“

In [13]:
def fetch_reviews_for_genre_movies(genre_ids):
    movies = discover_movies_by_genre(genre_ids)
     
    movie_reviews_dict = {} 
    # Iterate over each movie to fetch its reviews
    for movie in movies:
        movie_id = movie['id']
        reviews = fetch_movie_reviews(movie_id)  
        movie_reviews_dict[movie_id] = reviews    
    return movie_reviews_dict



In [14]:
# Fetch reviews for movies in the specified genres
genre_ids = ["878"] 
reviews_dict = fetch_reviews_for_genre_movies(genre_ids)

# Example to print the number of reviews for each movie
for movie_id, reviews in reviews_dict.items():
    print(f"Movie ID {movie_id} has {len(reviews)} review(s).")


Movie ID 932420 has 2 review(s).
Movie ID 1239251 has 0 review(s).
Movie ID 792307 has 6 review(s).
Movie ID 693134 has 4 review(s).
Movie ID 870404 has 0 review(s).
Movie ID 438631 has 14 review(s).
Movie ID 1094556 has 0 review(s).
Movie ID 636706 has 1 review(s).
Movie ID 609681 has 9 review(s).
Movie ID 940721 has 6 review(s).
Movie ID 933131 has 3 review(s).
Movie ID 823464 has 0 review(s).
Movie ID 799155 has 0 review(s).
Movie ID 1207413 has 0 review(s).
Movie ID 949697 has 0 review(s).
Movie ID 569094 has 9 review(s).
Movie ID 1078012 has 0 review(s).
Movie ID 695721 has 5 review(s).
Movie ID 667538 has 7 review(s).
Movie ID 624091 has 0 review(s).
Movie ID 76600 has 19 review(s).
Movie ID 615656 has 4 review(s).
Movie ID 848326 has 6 review(s).
Movie ID 634649 has 15 review(s).
Movie ID 841 has 3 review(s).
Movie ID 299536 has 20 review(s).
Movie ID 461130 has 5 review(s).
Movie ID 670292 has 10 review(s).
Movie ID 298618 has 11 review(s).
Movie ID 447365 has 5 review(s).
Movi

In [15]:
#  duplicate functoin
def extract_keywords_from_reviews(reviews):
    ensure_nltk_data()  # Ensure NLTK data is available
    rake = Rake()
    all_phrases_with_scores = []
    for review in reviews:
        rake.extract_keywords_from_text(review['content'])
        phrases_with_scores = rake.get_ranked_phrases_with_scores()
        all_phrases_with_scores.extend(phrases_with_scores)

    # Sort keywords by score in descending order
    sorted_phrases_with_scores = sorted(all_phrases_with_scores, key=lambda x: x[0], reverse=True)

    # Filter to keep only unique keywords, maintaining the order and their scores
    unique_keywords_with_scores = []
    seen_keywords = set()
    for score, phrase in sorted_phrases_with_scores:
        if phrase not in seen_keywords:
            unique_keywords_with_scores.append((score, phrase))
            seen_keywords.add(phrase)
        if len(unique_keywords_with_scores) == 100:
            break

    return unique_keywords_with_scores

In [16]:

def extract_keywords_for_all_movies(reviews_dict):
    movie_keywords_dict = {}
    for movie_id, reviews in reviews_dict.items():
        # Extract keywords from the list of reviews for the current movie
        keywords_with_scores = extract_keywords_from_reviews(reviews)
        # Store the extracted keywords in the dictionary with the movie ID as the key
        movie_keywords_dict[movie_id] = keywords_with_scores
    return movie_keywords_dict


In [17]:
def get_recommended_movies(movie_keywords_dict, positive_keywords, nlp):
    recommended_movies = []

    for movie_id, keywords_with_scores in movie_keywords_dict.items():
        keywords_text = '. '.join([phrase for score, phrase in keywords_with_scores])
        

        matched_keywords = filter_keywords(keywords_text, positive_keywords, nlp)

        if matched_keywords:
            recommended_movies.append(movie_id)
    
    return recommended_movies

In [18]:
# input keywords these should change if input more watched movies are reduce

# user liked movies Inception, Iron-Man, Hangover, Dark Knight, Avengers, Oppenhiemer
watched_movies_1 = [1726, 1724, 27205, 272, 24428, 585244]
movie_reviews_dict = fetch_reviews_for_watched_movies(watched_movies_1)
for movie_id, reviews in movie_reviews_dict.items():
    print(f"Movie ID {movie_id} has {len(reviews)} review(s).")
keywords = extract_keywords_from_all_reviews(movie_reviews_dict)

all_phrases = []
for movie_id, keywords_with_scores in keywords.items():
    for score, phrase in keywords_with_scores:
        all_phrases.append(phrase)  # Collecting phrases only


# Join all keywords into a single text string for further processing
text = '. '.join(all_phrases)

final_output = filter_keywords(text, positive_keywords, nlp)
print(final_output)

Movie ID 1726 has 9 review(s).
Movie ID 1724 has 4 review(s).
Movie ID 27205 has 6 review(s).
Movie ID 272 has 8 review(s).
Movie ID 24428 has 20 review(s).
Movie ID 585244 has 2 review(s).


  if token.text.lower() == keyword_token.text.lower() or token.similarity(keyword_token) > 1.0:


['comic', 'visual', 'strong', 'current', 'touching', 'important', 'brilliant', 'small', 'rich', 'modern', 'pure', 'fun', 'romantic', 'dramatic', 'interesting', 'cinematic', 'mature', 'emotional', 'sentimental']


In [19]:
watched_movies_2 = [98,27205,49530,17654,49494,68724,393624]
movie_reviews_dict_2 = fetch_reviews_for_watched_movies(watched_movies_2)
keywords_2 = extract_keywords_from_all_reviews(movie_reviews_dict_2)
for movie_id, reviews in movie_reviews_dict_2.items():
    print(f"Movie ID {movie_id} has {len(reviews)} review(s).")
keywords = extract_keywords_from_all_reviews(movie_reviews_dict_2)
all_phrases = []
for movie_id, keywords_with_scores in keywords_2.items():
    for score, phrase in keywords_with_scores:
        all_phrases.append(phrase)  # Collecting phrases only


# Join all keywords into a single text string for further processing
text_2 = '. '.join(all_phrases)

final_output_2 = filter_keywords(text_2, positive_keywords, nlp)
print(final_output_2)

Movie ID 98 has 5 review(s).
Movie ID 27205 has 6 review(s).
Movie ID 49530 has 5 review(s).
Movie ID 17654 has 4 review(s).
Movie ID 49494 has 0 review(s).
Movie ID 68724 has 4 review(s).
Movie ID 393624 has 2 review(s).
['visual', 'compelling', 'graphic', 'strong', 'sad', 'touching', 'important', 'brilliant', 'exceptional', 'historical', 'extraordinary', 'small', 'rich', 'worthwhile', 'convincing', 'charismatic', 'deep', 'pure', 'complex', 'gripping', 'powerful', 'futuristic', 'entertaining', 'interesting', 'exciting', 'cinematic', 'original', 'stunning', 'fresh', 'big', 'superb', 'emotional']


In [20]:
# input keywords these should change if input more watched movies are reduce
final_output = filter_keywords(text, positive_keywords, nlp)
print(final_output)

final_output_2 = filter_keywords(text_2, positive_keywords, nlp)
print(final_output_2)

unique_in_list_1 = set(final_output).difference(set(final_output_2))
unique_in_list_2 = set(final_output_2).difference(set(final_output))


print(unique_in_list_1)
print(unique_in_list_2)


  if token.text.lower() == keyword_token.text.lower() or token.similarity(keyword_token) > 1.0:


['comic', 'visual', 'strong', 'current', 'touching', 'important', 'brilliant', 'small', 'rich', 'modern', 'pure', 'fun', 'romantic', 'dramatic', 'interesting', 'cinematic', 'mature', 'emotional', 'sentimental']
['visual', 'compelling', 'graphic', 'strong', 'sad', 'touching', 'important', 'brilliant', 'exceptional', 'historical', 'extraordinary', 'small', 'rich', 'worthwhile', 'convincing', 'charismatic', 'deep', 'pure', 'complex', 'gripping', 'powerful', 'futuristic', 'entertaining', 'interesting', 'exciting', 'cinematic', 'original', 'stunning', 'fresh', 'big', 'superb', 'emotional']
{'mature', 'romantic', 'comic', 'modern', 'dramatic', 'current', 'fun', 'sentimental'}
{'compelling', 'graphic', 'sad', 'exceptional', 'historical', 'extraordinary', 'worthwhile', 'convincing', 'charismatic', 'deep', 'complex', 'gripping', 'powerful', 'futuristic', 'entertaining', 'exciting', 'original', 'stunning', 'fresh', 'big', 'superb'}


In [21]:
# Fetch reviews for movies in the specified genres
genre_ids = ["28"] 
reviews_dict = fetch_reviews_for_genre_movies(genre_ids)
print(len(reviews_dict))
movie_keywords_dict = extract_keywords_from_all_reviews(reviews_dict)
print(len(movie_keywords_dict))
print(movie_keywords_dict)

180
180


In [22]:
movie_keywords_dict = extract_keywords_from_all_reviews(reviews_dict)

recommended_movies = get_recommended_movies(movie_keywords_dict,final_output_2 , nlp)
print(recommended_movies)
print(len(recommended_movies))

  if token.text.lower() == keyword_token.text.lower() or token.similarity(keyword_token) > 1.0:


[932420, 848538, 940551, 969492, 866398, 763215, 609681, 572802, 940721, 933131, 1029575, 944401, 385687, 140300, 634492, 569094, 695721, 667538, 76600, 615656, 848326, 634649, 299054, 603692, 897087, 575264, 955916, 841, 299536, 315162, 461130, 670292, 926393, 298618, 361743, 447365, 399566, 980489, 24428, 293660, 848187, 635302, 565770, 1865, 507086, 637649, 373571, 122917, 119450, 675353, 1155089, 120, 505642, 522627, 1214314, 299534, 284054, 335977, 640146, 436270, 124905, 19995, 122, 99861, 198663, 22, 614930, 156022, 177572, 76341, 285, 545611, 58, 453395, 616037, 588228, 324857, 49530, 842675, 406759, 677179, 293167, 27205, 594767, 10138, 141052, 297802, 766507, 294254, 436969, 315635, 399579, 324552, 10196, 882569, 580489, 102382, 1726, 791373, 76757, 564, 557, 335983, 353491, 700391, 121, 429617]
107


In [23]:
movie_keywords_dict = extract_keywords_from_all_reviews(reviews_dict)
print(movie_keywords_dict)
recommended_movies_2 = get_recommended_movies(movie_keywords_dict, unique_in_list_2, nlp)
print(recommended_movies)
print(len(recommended_movies))



  if token.text.lower() == keyword_token.text.lower() or token.similarity(keyword_token) > 1.0:


[932420, 848538, 940551, 969492, 866398, 763215, 609681, 572802, 940721, 933131, 1029575, 944401, 385687, 140300, 634492, 569094, 695721, 667538, 76600, 615656, 848326, 634649, 299054, 603692, 897087, 575264, 955916, 841, 299536, 315162, 461130, 670292, 926393, 298618, 361743, 447365, 399566, 980489, 24428, 293660, 848187, 635302, 565770, 1865, 507086, 637649, 373571, 122917, 119450, 675353, 1155089, 120, 505642, 522627, 1214314, 299534, 284054, 335977, 640146, 436270, 124905, 19995, 122, 99861, 198663, 22, 614930, 156022, 177572, 76341, 285, 545611, 58, 453395, 616037, 588228, 324857, 49530, 842675, 406759, 677179, 293167, 27205, 594767, 10138, 141052, 297802, 766507, 294254, 436969, 315635, 399579, 324552, 10196, 882569, 580489, 102382, 1726, 791373, 76757, 564, 557, 335983, 353491, 700391, 121, 429617]
107


In [24]:
def fetch_movie_title(movie_id):
    movie_url = f'https://api.themoviedb.org/3/movie/{movie_id}'
    headers = {
    'Authorization': 'Bearer '+TMBD_ACCESS_TOKEN,
    'accept': 'application/json',
    }
    response = requests.get(movie_url, headers=headers)
    if response.status_code == 200:
        movie_data = response.json()
        return movie_data['title']
    else:
        print(f"Failed to fetch title for movie ID {movie_id}")
        return None

In [25]:
print(len(recommended_movies))
for movie_id in recommended_movies[:40]:
    title = fetch_movie_title(movie_id)
    if title:
        print(title)

107
Code 8 Part II
Argylle
Migration
Land of Bad
The Beekeeper
Damsel
The Marvels
Aquaman and the Lost Kingdom
Godzilla Minus One
Badland Hunters
The Family Plan
Dogman
Fast X
Kung Fu Panda 3
Madame Web
Spider-Man: Across the Spider-Verse
The Hunger Games: The Ballad of Songbirds & Snakes
Transformers: Rise of the Beasts
Avatar: The Way of Water
Meg 2: The Trench
Rebel Moon - Part One: A Child of Fire
Spider-Man: No Way Home
Expend4bles
John Wick: Chapter 4
Freelance
Mission: Impossible - Dead Reckoning Part One
Lift
Dune
Avengers: Infinity War
Puss in Boots: The Last Wish
Code 8
The Creator
The Equalizer 3
The Flash
Top Gun: Maverick
Guardians of the Galaxy Vol. 3
Godzilla vs. Kong
Gran Turismo
The Avengers
Deadpool


In [26]:
import os
temp = -1
new_pid = os.fork()
if new_pid == 0:
    temp = -3
    print('The value of temp is', temp)
print('Temp and pid here are:', temp, new_pid)

Temp and pid here are: -1 82060
The value of temp is -3


In [None]:
for movie_id in recommended_movies_2[:10]:
    title = fetch_movie_title(movie_id)
    if title:
        print(title)

Temp and pid here are:-3
 0