In [8]:
import requests
import json
import pandas as pd
import time
import datetime
import warnings
import numpy as np
warnings.filterwarnings("ignore")

from ipynb.fs.defs.keys import API_KEY

## API Extract

In [2]:
def get_genres(api_key):
    url = 'http://api.themoviedb.org/3/genre/movie/list'

    headers = {
    "accept": "application/json",
    "Authorization": {api_key}
    }   
    
    response = requests.get(url, headers=headers)
    
    all_results = []
    
    if response.status_code == 200:
        data = response.json()
        all_results.extend(data.get("genres",[]))
    else:
        print(f"Failed: {response.status_code}")
        
    return pd.DataFrame(all_results)

def get_movies(page, api_key):
    url = f"https://api.themoviedb.org/3/trending/movie/day?language=en-US&page={page}"
    headers = {
            "accept": "application/json",
            "Authorization": {api_key}
            }
    response = requests.get(url, headers=headers)
    
    return response

def get_all_pages(api_key):
    all_results = [] 
    page = 1
    max_pages = 500
    
    while page <= max_pages:
        response = get_movies(page, api_key)
        if response.status_code == 200:
            data = response.json()
            all_results.extend(data.get('results', []))
        else:
            print(f"Failed to fetch page {page}: {response.status_code}")
            break
        
        page += 1
    
    return pd.DataFrame(all_results)

In [4]:
genres = get_genres(API_KEY)
df = get_all_pages(API_KEY)

## Transform

In [9]:
df_movies = df.copy()

In [10]:
# Removendo colunas
for column in df_movies.columns:
    count = df_movies[column].isnull().sum()
    if count > 200:
        df_movies.drop(column,axis=1,inplace=True)

In [11]:
# Removendo nulos de poster_path
for i, value in enumerate(df_movies['poster_path'].values):
    if value is None:
        df_movies.iloc[i,4] = "not_informed"

In [12]:
# Atribuindo a url aos posters        
df_movies['poster_path'] = df_movies['poster_path'].apply(lambda x: 'https://image.tmdb.org/t/p/original/' + x if x != "not_informed" else x)

In [13]:
# Mantendo apenas o primeiro genero de cada filme
for i, value in enumerate(df_movies['genre_ids'].values):
    try:
        df_movies['genre_ids'][i] = df_movies['genre_ids'][i][0]
    except:
        df_movies['genre_ids'][i] = "not_informed"

In [14]:
# Mantendo apenas o ano de lançamento do filme
df_movies['release_date'] = pd.to_datetime(df_movies['release_date'],format='%Y/%m/%d')

df_movies['release_date'] = df_movies['release_date'].fillna('not_informed')

df_movies['release_year'] = df_movies['release_date'].apply(lambda x: x.strftime('%Y') if x != "not_informed" else x)

In [23]:
def replace_genre(x):
    for ids,name in list(zip(generos['id'],generos['name'])):
        if x == ids:
            name_genre = name
            return name_genre

# Criando coluna dos generos substituindo os ids
df_movies['genre_name'] = df_movies['genre_ids'].map(replace_genre)

In [None]:
# Substituindo siglas

df_movies['original_language'] = df_movies['original_language'].apply(lambda x: x.replace('en', 'english') if isinstance(x, str) else x)

## Export

In [59]:
def to_csv(file):
    try:
        file.to_csv(r'C:\Users\znaya\Desktop\tmdb\base_movies.csv',index=True)
        print("Salvo com sucesso")
    except:
        print("Erro ao salvar")

In [60]:
to_csv(df_movies)

Salvo com sucesso


## View

In [25]:
df_movies['genre_name'].value_counts()

Drama              2051
Action             1472
Comedy             1461
Horror              821
Animation           678
Thriller            505
Adventure           496
Crime               388
Romance             353
Documentary         351
Science Fiction     333
Family              222
Fantasy             205
Music               136
Mystery             127
Western              92
War                  85
History              62
TV Movie             47
Name: genre_name, dtype: int64

In [36]:
df_movies.query("popularity > 1000")

Unnamed: 0,id,title,original_title,overview,poster_path,media_type,adult,original_language,genre_ids,popularity,release_date,video,vote_average,vote_count,release_year,genre_name
6,653346,Kingdom of the Planet of the Apes,Kingdom of the Planet of the Apes,"Several generations following Caesar's reign, ...",https://image.tmdb.org/t/p/original//gKkl37BQu...,movie,False,en,878,1526.548,2024-05-08 00:00:00,False,7.132,1950,2024,Science Fiction
8,1022789,Inside Out 2,Inside Out 2,Teenager Riley's mind headquarters is undergoi...,https://image.tmdb.org/t/p/original//vpnVM9B6N...,movie,False,en,16,5100.537,2024-06-11 00:00:00,False,7.7,1934,2024,Animation
11,974262,Descendants: The Rise of Red,Descendants: The Rise of Red,After the Queen of Hearts incites a coup on Au...,https://image.tmdb.org/t/p/original//8fYluTtB3...,movie,False,en,14,2015.902,2024-07-11 00:00:00,False,7.3,145,2024,Fantasy
12,786892,Furiosa: A Mad Max Saga,Furiosa: A Mad Max Saga,"As the world fell, young Furiosa is snatched f...",https://image.tmdb.org/t/p/original//iADOJ8Zym...,movie,False,en,28,1792.329,2024-05-22 00:00:00,False,7.6,2216,2024,Action
13,748783,The Garfield Movie,The Garfield Movie,"Garfield, the world-famous, Monday-hating, las...",https://image.tmdb.org/t/p/original//p6AbOJvMQ...,movie,False,en,16,2053.649,2024-04-30 00:00:00,False,7.261,499,2024,Animation
14,519182,Despicable Me 4,Despicable Me 4,"Gru and Lucy and their girls — Margo, Edith an...",https://image.tmdb.org/t/p/original//3w84hCFJA...,movie,False,en,16,3949.4,2024-06-20 00:00:00,False,7.373,334,2024,Animation
25,280180,Beverly Hills Cop: Axel F,Beverly Hills Cop: Axel F,Forty years after his unforgettable first case...,https://image.tmdb.org/t/p/original//zszRKfzjM...,movie,False,en,28,1214.968,2024-06-20 00:00:00,False,6.905,629,2024,Action
47,573435,Bad Boys: Ride or Die,Bad Boys: Ride or Die,"After their late former Captain is framed, Low...",https://image.tmdb.org/t/p/original//nP6RliHjx...,movie,False,en,28,1219.92,2024-06-05 00:00:00,False,7.119,574,2024,Action
57,560016,Monkey Man,Monkey Man,Kid is an anonymous young man who ekes out a m...,https://image.tmdb.org/t/p/original//4lhR4L2vz...,movie,False,en,28,1085.504,2024-04-03 00:00:00,False,7.065,607,2024,Action
2885,943344,Underground Monster,地底怪物,,https://image.tmdb.org/t/p/original//lWVwWRLqp...,movie,False,zh,14,1014.448,2023-04-27 00:00:00,False,3.667,6,2023,Fantasy
