## API Call

In [8]:
import requests
import time

# Replace 'YOUR_API_KEY' with your actual TMDb API key
API_KEY = 'API_KEY'
BASE_URL = 'https://api.themoviedb.org/3'

def get_movie_details(movie_ids):
    details = []
    for movie_id in movie_ids:
        endpoint = f'/movie/{movie_id}'
        params = {
            'api_key': API_KEY
        }
        response = requests.get(BASE_URL + endpoint, params=params)
        if response.status_code == 200:
            details.append(response.json())
        else:
            print(f"Error fetching details for movie ID {movie_id}. Status code: {response.status_code}")
        time.sleep(0.1)  # Delay to avoid rate limits
    return details

def fetch_movie_ids(page_start, page_limit):
    all_movie_ids = []
    for page in range(page_start, page_start + page_limit):
        endpoint = '/movie/popular'
        params = {
            'api_key': API_KEY,
            'page': page
        }
        response = requests.get(BASE_URL + endpoint, params=params)
        if response.status_code == 200:
            movie_data = response.json()
            movie_ids = [movie['id'] for movie in movie_data['results']]
            all_movie_ids.extend(movie_ids)
        else:
            print(f"Error fetching movie IDs from page {page}. Status code: {response.status_code}")
        time.sleep(0.1)  # Delay to avoid rate limits
    return all_movie_ids

# Loop to fetch data in chunks
total_cycles = 6
pages_per_cycle = 100
all_movie_details = []

for cycle in range(total_cycles):
    page_start = 1 + cycle * pages_per_cycle
    print(f"Fetching cycle {cycle + 1}, starting at page {page_start}...")
    movie_ids = fetch_movie_ids(page_start, pages_per_cycle)
    movie_details = get_movie_details(movie_ids)
    all_movie_details.extend(movie_details)

# Example: Print details of the first movie
if all_movie_details:
    first_movie = all_movie_details[0]
    print("Title:", first_movie['title'])
    print("Overview:", first_movie['overview'])
    print("Release Date:", first_movie['release_date'])


Fetching cycle 1, starting at page 1...
Fetching cycle 2, starting at page 101...
Fetching cycle 3, starting at page 201...
Fetching cycle 4, starting at page 301...
Fetching cycle 5, starting at page 401...
Fetching cycle 6, starting at page 501...
Error fetching movie IDs from page 501. Status code: 400
Error fetching movie IDs from page 502. Status code: 400
Error fetching movie IDs from page 503. Status code: 400
Error fetching movie IDs from page 504. Status code: 400
Error fetching movie IDs from page 505. Status code: 400
Error fetching movie IDs from page 506. Status code: 400
Error fetching movie IDs from page 507. Status code: 400
Error fetching movie IDs from page 508. Status code: 400
Error fetching movie IDs from page 509. Status code: 400
Error fetching movie IDs from page 510. Status code: 400
Error fetching movie IDs from page 511. Status code: 400
Error fetching movie IDs from page 512. Status code: 400
Error fetching movie IDs from page 513. Status code: 400
Error fet

In [9]:
all_movie_details

[{'adult': False,
  'backdrop_path': '/qrGtVFxaD8c7et0jUtaYhyTzzPg.jpg',
  'belongs_to_collection': {'id': 1280074,
   'name': 'Kong Collection',
   'poster_path': '/lhyEUeOihbKf7ll8RCIE5CHTie3.jpg',
   'backdrop_path': None},
  'budget': 150000000,
  'genres': [{'id': 28, 'name': 'Action'},
   {'id': 878, 'name': 'Science Fiction'},
   {'id': 12, 'name': 'Adventure'}],
  'homepage': 'https://www.godzillaxkongmovie.com',
  'id': 823464,
  'imdb_id': 'tt14539740',
  'origin_country': ['US'],
  'original_language': 'en',
  'original_title': 'Godzilla x Kong: The New Empire',
  'overview': 'Following their explosive showdown, Godzilla and Kong must reunite against a colossal undiscovered threat hidden within our world, challenging their very existence – and our own.',
  'popularity': 1818.259,
  'poster_path': '/tMefBSflR6PGQLv7WvFPpKLZkyk.jpg',
  'production_companies': [{'id': 923,
    'logo_path': '/8M99Dkt23MjQMTTWukq4m5XsEuo.png',
    'name': 'Legendary Pictures',
    'origin_country

In [16]:
import pandas as pd
df = pd.DataFrame(all_movie_details)

# columns to select
columns_to_select = ['imdb_id', 'title', 'original_language', 'origin_country', 'adult',
                     'genres', 'budget', 'revenue', 'release_date', 
                     'runtime','popularity', 'vote_average', 'vote_count']

# Selecting the columns from dataset
selected_df = df[columns_to_select]

# Renaming the columns
selected_df = selected_df.rename(columns={
    'original_language': 'language', 
    'origin_country': 'country',
    'genres': 'genres', 
    'vote_average': 'avg_rating', 
    'vote_count': 'people_rated'
})



In [17]:
selected_df.to_csv('api_movie_data.csv', index = False)

In [19]:
import boto3

def upload_file_to_s3(file_name, bucket_name, object_name=None):

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket_name, object_name)
    except Exception as e:
        print(f"Upload failed: {e}")
        return False
    return True

# Example usage:
file_name = 'api_movie_data.csv'
bucket_name = 'final-project-rawdata-group-5-sec-2'
upload_file_to_s3(file_name, bucket_name)


True