In [22]:
import pandas as pd
import numpy as np
from tmdbv3api import TMDb
from tmdbv3api import Movie
from dotenv import load_dotenv
import os
import requests
from datetime import datetime

In [23]:
load_dotenv()

True

In [24]:
tmdb = TMDb()
tmdb.api_key = os.getenv('API_KEY')
tmdb.language = 'en'
tmdb.debug = True

In [25]:
movie = Movie()

In [26]:
data = pd.read_html('https://en.wikipedia.org/wiki/List_of_American_films_of_2023')

In [27]:
names = []

In [28]:
for i in range(3, 7):
    for i in data[i]['Title']:
        names.append(i)

In [29]:
names[ : 4]

['M3GAN', 'The Old Way', 'The Devil Conspiracy', 'Plane']

In [30]:
movie_id = []
movie_not_found = []

In [31]:
date_format = "%Y-%m-%d"
start_date = datetime.strptime("2023-01-01", date_format)
end_date = datetime.strptime("2023-12-31", date_format)
for name in names:
    search = movie.search(name)
    for res in search:
        if(type(res) == str):
            print("Movie data does not exist")
            break
        if(res['release_date']):
            if (datetime.strptime(res['release_date'], date_format) >= start_date and datetime.strptime(res['release_date'], date_format) <= end_date):
                movie_id.append((name, res['id']))
                break      
        else :
            print("Does not exist")
    else :
        # print("Removed " + name + " from the names list")
        movie_not_found.append(name)
        # names.remove('name')

Does not exist
Does not exist
Does not exist
Does not exist
Does not exist
Does not exist
Does not exist


In [35]:
movie_id[ : 4]

[('The Old Way', 872954),
 ('The Devil Conspiracy', 296271),
 ('Plane', 646389),
 ('House Party', 632065)]

In [36]:
details = []

In [37]:
def get_genres(obj):
    genres = []

    # print(type(obj))
    for i in obj:
        # print(i['name'])
        genres.append(i['name'])
    return genres

In [38]:
for i in movie_id:

    url = f'https://api.themoviedb.org/3/movie/{i[1]}?api_key={os.getenv("API_KEY")}'
    response = requests.get(url)
    data_json = response.json()
    movie_details = {}
    movie_details['name'] = data_json['title']
    movie_details['id'] = i[1]
    movie_details['overview'] = data_json['overview'] 
    movie_details['genres'] = get_genres(data_json['genres'])
    details.append(movie_details)

In [39]:
details[ : 4 ]

[{'name': 'The Old Way',
  'id': 872954,
  'overview': 'An old gunslinger and his daughter must face the consequences of his past, when the son of a man he killed years ago arrives to take his revenge.',
  'genres': ['Western']},
 {'name': 'The Devil Conspiracy',
  'id': 296271,
  'overview': 'A powerful biotech company has breakthrough technology allowing them to clone history’s most influential people with just a few fragments of DNA. Behind this company is a cabal of Satanists that steals the shroud of Christ, putting them in possession of Jesus’ DNA. The clone will serve as the ultimate offering to the devil. The Archangel Michael comes to earth and will stop at nothing to end the devil’s conspiracy.',
  'genres': ['Horror', 'Fantasy', 'Science Fiction', 'Thriller']},
 {'name': 'Plane',
  'id': 646389,
  'overview': 'After a heroic job of successfully landing his storm-damaged aircraft in a war zone, a fearless pilot finds himself between the agendas of multiple militias planning t

In [40]:
df = pd.DataFrame(details)

In [41]:
df.head()

Unnamed: 0,name,id,overview,genres
0,The Old Way,872954,An old gunslinger and his daughter must face t...,[Western]
1,The Devil Conspiracy,296271,A powerful biotech company has breakthrough te...,"[Horror, Fantasy, Science Fiction, Thriller]"
2,Plane,646389,After a heroic job of successfully landing his...,"[Action, Adventure, Thriller]"
3,House Party,632065,Aspiring club promoters and best buds Damon an...,[Comedy]
4,Sick,1195746,We all know what it feels like when you're sic...,[]


In [42]:
def get_actors(id):
    url  =  f'https://api.themoviedb.org/3/movie/{id}/credits?api_key={os.getenv("API_KEY")}'
    resonse = requests.get(url)
    data_json = resonse.json()
    obj = data_json['cast']
    actors = []
    length = 5 if len(obj) > 5 else len(obj)
    for i in range(length):
        actors.append(obj[i]['name'])
    return actors

In [43]:

def get_directors(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={os.getenv("API_KEY")}'
    response = requests.get(url)

    if response.status_code == 200:
        try:
            data_json = response.json()
            obj = data_json['crew']
            directors = [i['name'] for i in obj if i['job'] == 'Director']
            return directors
        except Exception as e:
            print(f"Error: {e}")
            return []
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return []

In [45]:
df['actors'] = df['id'].apply(get_actors)

In [48]:
df['director'] = df['id'].apply(get_directors)

In [49]:
df.head()

Unnamed: 0,name,id,overview,genres,actors,director
0,The Old Way,872954,An old gunslinger and his daughter must face t...,[Western],"[Nicolas Cage, Ryan Kiera Armstrong, Noah Le G...",[Brett Donowho]
1,The Devil Conspiracy,296271,A powerful biotech company has breakthrough te...,"[Horror, Fantasy, Science Fiction, Thriller]","[Alice Orr-Ewing, Joe Doyle, Eveline Hall, Pet...",[Nathan Frankowski]
2,Plane,646389,After a heroic job of successfully landing his...,"[Action, Adventure, Thriller]","[Gerard Butler, Mike Colter, Tony Goldwyn, Yos...",[Jean-François Richet]
3,House Party,632065,Aspiring club promoters and best buds Damon an...,[Comedy],"[Jacob Latimore, Tosin Cole, Karen Obilom, D.C...",[Calmatic]
4,Sick,1195746,We all know what it feels like when you're sic...,[],"[Aidan Mason, Joshua Uy]",[Aidan Mason]


In [50]:
def remove_space(obj):
    l = []
    for i in obj:
        l.append(i.replace(" ", ""))
    return l

In [51]:
df['actors'] = df['actors'].apply(remove_space)
df['overview'] = df['overview'].apply(lambda x : x.split())
df['director'] = df['director'].apply(remove_space)
df['tags'] = df['overview'] + df['genres'] + df['actors'] + df['director']
df['tags'] = df['tags'].apply(lambda x : " ".join(x))

In [52]:
df.head()

Unnamed: 0,name,id,overview,genres,actors,director,tags
0,The Old Way,872954,"[An, old, gunslinger, and, his, daughter, must...",[Western],"[NicolasCage, RyanKieraArmstrong, NoahLeGros, ...",[BrettDonowho],An old gunslinger and his daughter must face t...
1,The Devil Conspiracy,296271,"[A, powerful, biotech, company, has, breakthro...","[Horror, Fantasy, Science Fiction, Thriller]","[AliceOrr-Ewing, JoeDoyle, EvelineHall, PeterM...",[NathanFrankowski],A powerful biotech company has breakthrough te...
2,Plane,646389,"[After, a, heroic, job, of, successfully, land...","[Action, Adventure, Thriller]","[GerardButler, MikeColter, TonyGoldwyn, YosonA...",[Jean-FrançoisRichet],After a heroic job of successfully landing his...
3,House Party,632065,"[Aspiring, club, promoters, and, best, buds, D...",[Comedy],"[JacobLatimore, TosinCole, KarenObilom, D.C.Yo...",[Calmatic],Aspiring club promoters and best buds Damon an...
4,Sick,1195746,"[We, all, know, what, it, feels, like, when, y...",[],"[AidanMason, JoshuaUy]",[AidanMason],We all know what it feels like when you're sic...


In [54]:
new_df = df[['id', 'name', 'tags']]

In [55]:
new_df.head()

Unnamed: 0,id,name,tags
0,872954,The Old Way,An old gunslinger and his daughter must face t...
1,296271,The Devil Conspiracy,A powerful biotech company has breakthrough te...
2,646389,Plane,After a heroic job of successfully landing his...
3,632065,House Party,Aspiring club promoters and best buds Damon an...
4,1195746,Sick,We all know what it feels like when you're sic...
