In [12]:
# import dependencies
import requests
import pandas as pd
from config import api_key

In [13]:
# import keywords and antikeywords that we'll use to filter our movies, 

#Set parameters:
name = 'BC'
keywords = 'resources/BC_keywords.csv'
antikeywords = 'resources/BC_antikeywords.csv'

In [14]:
# Function for extracting movies using a keyword and antikeyword list
def get_movies(name, keywords, antikeywords):

    # import keywords and antikeywords that we'll use to filter our movies, 
    # keywords includes an estimated date range.
    keyword_df = pd.read_csv(keywords)
    antikeyword_df = pd.read_csv(antikeywords)

    # Production 
    pages = list(range(1,500))

    # Test
    # pages = list(range(1,10))

    movie_df = pd.DataFrame()

    for year in range(1945, 2022):
    # for year in range(2015, 2022):

        x = 0

        for page in pages:
            movie_df_length = len(movie_df)
            movie_data = pd.DataFrame(requests.get(f'https://api.themoviedb.org/3/discover/movie?api_key={api_key}&primary_release_date.gte={year}-01-01&without_genres=99,878,16,14,27&primary_release_date.lte={year}-12-31&vote_count.gte=0&vote_average.gte=3.9&with_runtime.gte=55&sort_by=release_date.asc&page={page}').json()['results'])
            movie_df = movie_df.append(movie_data)

            x += 1
            length = len(movie_df.index)
            
            if len(movie_df) == movie_df_length:
                break
        
        if year % 5 == 0 or x > 70:
            print(f'Currently passing year:{year}, with: {x} pages and {length} results')

    print(f'Raw Results: {length}')

    movie_df.reset_index(drop = True, inplace=True)
    movie_df.drop(columns=['adult','backdrop_path','original_language','original_title','poster_path','video'], inplace=True)
    movie_df = pd.DataFrame(movie_df.loc[movie_df.astype(str).drop_duplicates(subset='title').index])

    #Change Column name in keyword_df
    keyword_df.rename({'Topic/Keyword':'keyword'}, axis=1, inplace=True)

    # get keyword and antikeyword lists for loops
    key_list = keyword_df.keyword.to_list()
    antikey_list = antikeyword_df.BC_anti_keywords.to_list()

    # Create a revised_movie_df to hold movies that meet the keyword criteria
    column_list = movie_df.columns.to_list()

    revised_movie_df = pd.DataFrame(columns=column_list)

    keyword_column = []
    dates_column = []
    i = 0

    # Search movie overview for keywords, if a keyword is found the movie is added to the list.
    for movie in movie_df.overview:
        key_holder = []
        date_holder = []
        j = 0
        k = 0

        for keyword in key_list:
            if keyword in movie:
                date_holder.append(keyword_df['Start Date'][j])
                key_holder.append(keyword)
                if len(key_holder) == 1 and k == 0:
                    try:
                        revised_movie_df = revised_movie_df.append(movie_df.iloc[[i]])
                        k += 1
                    except KeyError:
                        print('KeyError found')
            j += 1
        if len(key_holder) != 0:
            keyword_column.append(key_holder)
            dates_column.append(date_holder)
        i += 1

    # found keywords and estimated dates (based on event, location, or person) are added to the new df
    revised_movie_df['keyword'] = keyword_column
    revised_movie_df['est_date'] = dates_column

    revised_movie_df.reset_index(drop = True, inplace=True)

    # Cycle through new df and find words that should exculde the movie (moder terms or words taht fit outside desired timeline)
    i_2 = 0
    drop_list = []

    for movie in revised_movie_df.overview:
        for antikey in antikey_list:   
            if antikey in movie:
                drop_list.append(i_2) 
                break      
        i_2 += 1
    revised_movie_df = revised_movie_df.drop(drop_list,axis=0)

    revised_movie_df.reset_index(drop = True, inplace=True)

    revised_movie_df.to_csv(f'resources/{name}_movie_list.csv')

    return revised_movie_df

In [15]:
get_movies(name, keywords, antikeywords)

Currently passing year:1945, with: 22 pages and 402 results
Currently passing year:1950, with: 33 pages and 3010 results
Currently passing year:1955, with: 36 pages and 6212 results
Currently passing year:1960, with: 40 pages and 10090 results
Currently passing year:1965, with: 42 pages and 14093 results
Currently passing year:1970, with: 54 pages and 18829 results
Currently passing year:1975, with: 48 pages and 23903 results
Currently passing year:1980, with: 53 pages and 28963 results
Currently passing year:1985, with: 58 pages and 34290 results
Currently passing year:1990, with: 62 pages and 40107 results
Currently passing year:1995, with: 67 pages and 46347 results
Currently passing year:1997, with: 72 pages and 49096 results
Currently passing year:1998, with: 78 pages and 50633 results
Currently passing year:1999, with: 80 pages and 52209 results
Currently passing year:2000, with: 86 pages and 53896 results
Currently passing year:2001, with: 92 pages and 55709 results
Currently pa

Unnamed: 0,genre_ids,id,overview,popularity,release_date,title,vote_average,vote_count,keyword,est_date
0,"[18, 36, 35]",31561,The aging Caesar finds himself intrigued by th...,6.773,1945-12-11,Caesar and Cleopatra,6.3,37,[Egypt],[-3150]
1,"[18, 10749, 12]",29993,When strongman Samson rejects the love of the ...,19.252,1949-12-21,Samson and Delilah,6.5,127,[Samson],[-1118]
2,"[18, 10749, 36]",42567,King David enters into an adulterous affair wi...,7.166,1951-08-10,David and Bathsheba,6.4,35,[Bathsheba],[-975]
3,"[36, 12]",114771,Cleopatra (Rhonda Fleming) toys with Mark Anto...,1.253,1953-05-08,Serpent of the Nile,7.7,3,"[Egypt, Rome, Mark Antony, Cleopatra]","[-3150, -753, -83, -51]"
4,"[36, 12]",350806,The Jews are taken from Jerusalem and made sla...,1.049,1953-10-21,Slaves of Babylon,6.0,1,"[Jerusalem, Cyrus, Persia, Persian]","[-2800, -550, -550, -550]"
...,...,...,...,...,...,...,...,...,...,...
402,[18],640917,A screen adaptation of the novel of the same n...,3.482,2021-04-27,The Time of Indifference,6.3,16,[Rome],[-753]
403,"[12, 35]",577242,"Following the end of the acclaimed tv series, ...",23.702,2021-07-21,Kaamelott - The First Chapter,7.1,353,[Rome],[-753]
404,"[28, 18, 37, 12]",729577,"Sam Worthington stars as Isaac LeMay, a murder...",48.356,2021-12-10,The Last Son,6.3,11,[Solomon],[-970]
405,"[18, 53]",874581,"Mute young woman Patience, from an isolated co...",4.412,2021-11-26,Lapwing,7.0,1,[Egypt],[-3150]
