In [319]:
# required imports
import requests
from urllib import parse as urlParser
import json
import rx
from rx import operators as ops
from datetime import timedelta, date, datetime
import csv

In [276]:
# loading API key from proper file
# remember add file to gitignore
apiKeyFilePath = "api_key.txt"
userApiKey = None
with open(apiKeyFilePath, 'r') as f:
    userApiKey = f.readline()

In [362]:
class MovieData:
    fields = ['_id_','belongs_to_collection','budget','genres','homepage','imdb_id','original_language',
              'original_title','overview','popularity','poster_path','production_company','production_countries',
              'release_date','runtime','spoken_languages','status','tagline','title','keywords','cast','crew','revenue']
    _id_ = None
    belongs_to_collection = None
    budget = None
    genres = None
    homepage = None
    imdb_id = None
    original_language = None
    original_title = None
    overview = None
    popularity = None
    poster_path = None
    production_company = None
    production_countries = None
    release_date = None
    runtime = None
    spoken_languages = None
    status = None
    tagline = None
    title = None
    keywords = None
    cast = None
    crew = None
    revenue = None
    
    def load_data_from_json(self, jsonMovieData):
        self._id_ = jsonMovieData.get('id')
        self.belongs_to_collection = jsonMovieData.get('belongs_to_collection')
        self.budget = jsonMovieData.get('budget')
        self.genres = jsonMovieData.get('genres')
        self.homepage = jsonMovieData.get('homepage')
        self.imdb_id = jsonMovieData.get('imdb_id')
        self.original_language = jsonMovieData.get('original_language')
        self.original_title = jsonMovieData.get('original_title')
        self.overview = jsonMovieData.get('overview')
        self.popularity = jsonMovieData.get('popularity')
        self.poster_path = jsonMovieData.get('poster_path')
        self.production_company = jsonMovieData.get('production_company')
        self.production_countries = jsonMovieData.get('production_countries')
        self.release_date = datetime.strptime(jsonMovieData.get('release_date'), '%Y-%m-%d') 
        self.runtime = jsonMovieData.get('runtime')
        self.spoken_languages = jsonMovieData.get('spoken_languages')
        self.status = jsonMovieData.get('status')
        self.tagline = jsonMovieData.get('tagline')
        self.title = jsonMovieData.get('title')
        self.revenue = jsonMovieData.get('revenue')
        
    def load_keywords(self, jsonMovieData):
        self.keywords = jsonMovieData.get('keywords')
        return self
    
    def load_credits(self, jsonMovieData):
        self.crew = jsonMovieData.get('crew')
        self.cast = jsonMovieData.get('cast')
        return self
    
    def add_data_to_csv(self, filename):
        with open(filename, 'a', encoding='utf8') as f:
            writer = csv.writer(f)
            writer.writerow([self._id_,
                    self.belongs_to_collection,
                    self.budget,
                    self.genres,
                    self.homepage,
                    self.imdb_id,
                    self.original_language,
                    self.original_title,
                    self.overview,
                    self.popularity,
                    self.poster_path,
                    self.production_company,
                    self.production_countries,
                    self.release_date,
                    self.runtime,
                    self.spoken_languages,
                    self.status,
                    self.tagline,
                    self.title,
                    self.keywords,
                    self.cast,
                    self.crew,
                    self.revenue])

def create_csv_header(filename, columns):
    with open(filename, 'w', encoding='utf8') as f:
        writer = csv.writer(f)
        writer.writerow(columns) # namedtuple breaks convention public fields have single underscore


In [365]:
def daterange(startDate, endDate):
    return (startDate + timedelta(n) for n in range(int((endDate - startDate).days)+1))

def tmdb_movie_relased_in_day_query_creator(releasedDate, apiKey, page=1):
    if not isinstance(releasedDate, date):
        raise Exception("Invalid Argument")
    dateString = releasedDate.strftime('%Y-%m-%d')
    baseAPIurl = "https://api.themoviedb.org/3/discover/movie"
    params = {
        'api_key':apiKey,
        'page':page,
        'primary_release_date.gte':dateString, 
        'primary_release_date.lte':dateString, 
        'sort_by':'primary_release_date.asc'
    }
    # setting query params
    urlParts = list(urlParser.urlparse(baseAPIurl))
    urlParts[4] = urlParser.urlencode(params)
    
    #returning ready query with API key
    return urlParser.urlunparse(urlParts) 

def tmdb_movie_by_id_query_creator(movieDiscoveryJsonData, apiKey):
    baseAPIurl = "https://api.themoviedb.org/3/movie/"+str(movieDiscoveryJsonData.get('id'))
    params = {
            'api_key':apiKey
    }
    urlParts = list(urlParser.urlparse(baseAPIurl))
    urlParts[4] = urlParser.urlencode(params)
    
    #returning ready query with API key
    return urlParser.urlunparse(urlParts) 

def tmdb_movie_credits_query_creator(movieData, apiKey):
    baseAPIurl = "https://api.themoviedb.org/3/movie/{0}/credits".format(movieData._id_)
    params = {
            'api_key':apiKey
    }
    urlParts = list(urlParser.urlparse(baseAPIurl))
    urlParts[4] = urlParser.urlencode(params)
    
    #returning ready query with API key
    return (movieData, urlParser.urlunparse(urlParts)) 

def tmdb_movie_keywords_query_creator(movieData, apiKey):
    baseAPIurl = "https://api.themoviedb.org/3/movie/{0}/keywords".format(movieData._id_)
    params = {
            'api_key':apiKey
    }
    urlParts = list(urlParser.urlparse(baseAPIurl))
    urlParts[4] = urlParser.urlencode(params)
    
    #returning ready query with API key
    return (movieData, urlParser.urlunparse(urlParts)) 

def map_to_page_date_range(response, date):
    return rx.from_((pn, date) for pn in range(1, response.get('total_pages')+1))

def get_json_request(url):
    return requests.get(url).json()

def movie_data_page_extractor(jsonMovieData):
    return rx.from_iterable(jsonMovieData.get("results"))

def get_json_request_data(movieData, url):
    return (movieData, requests.get(url).json())

def basic_movie_data_mapper(jsonMovieData):
    movieData = MovieData()
    movieData.load_data_from_json(jsonMovieData)
    return movieData

def add_rov_verbous(movieData, outputFilename):
    print("New record processed")
    movieData.add_data_to_csv(outputFilename)

In [367]:
outputFilename = "output.csv"
startDate = date(2020,1,1)
endDate =  date(2020,1,1)
create_csv_header(outputFilename, MovieData.fields)

rx.from_(daterange(startDate, endDate)).pipe(
    ops.map(lambda date: (tmdb_movie_relased_in_day_query_creator(date, userApiKey), date)),
    ops.map(lambda query_date: (get_json_request(query_date[0]), query_date[1])),
    ops.flat_map(lambda response_date: map_to_page_date_range(response_date[0], response_date[1])),
    ops.map(lambda pageNumber_date: tmdb_movie_relased_in_day_query_creator(pageNumber_date[1], userApiKey, page=pageNumber_date[0])),
    ops.map(lambda query: get_json_request(query)),
    ops.flat_map(lambda response: movie_data_page_extractor(response)),
    ops.map(lambda moviesJsonData: tmdb_movie_by_id_query_creator(moviesJsonData, userApiKey)),
    ops.map(lambda query: get_json_request(query)),
    ops.map(lambda movie: basic_movie_data_mapper(movie)),
    ops.map(lambda movieData: tmdb_movie_credits_query_creator(movieData, userApiKey)),
    ops.map(lambda movieData_url: get_json_request_data(movieData_url[0], movieData_url[1])),
    ops.map(lambda movieData_jsonData: movieData_jsonData[0].load_credits(movieData_jsonData[1])),
    ops.map(lambda movieData: tmdb_movie_keywords_query_creator(movieData, userApiKey)),
    ops.map(lambda movieData_url: get_json_request_data(movieData_url[0], movieData_url[1])),
    ops.map(lambda movieData_jsonData: movieData_jsonData[0].load_keywords(movieData_jsonData[1]))
).subscribe(lambda movieData: add_rov_verbous(movieData,outputFilename), on_error=lambda e: print(e), on_completed=lambda: print("Loading finished"))


New record processed


KeyboardInterrupt: 