In [2]:
import tmdbsimple as tmdb
import pandas as pd
import time

# ----------------------------------------------------------------------
# ‚ö†Ô∏è IMPORTANT: Replace with your actual TMDb API Key
# ----------------------------------------------------------------------
tmdb.API_KEY = 'c534376b50016a7dc7e4f8033944c823' 
OUTPUT_FILENAME = 'movies_tmdb_api_raw.csv'

def get_top_rated_movies():
    """
    Continuously fetches top-rated movies from TMDb.
    Automatically stops when no more pages are available.
    You can manually stop anytime using Ctrl + C.
    """
    all_movie_data = []
    movies = tmdb.Movies()
    
    # Get genre list for ID -> Name mapping
    genres_list = tmdb.Genres().movie_list()['genres']
    genre_map = {g['id']: g['name'] for g in genres_list}

    print("üé¨ Starting data collection from TMDb Top Rated movies...")
    print("üëâ Press Ctrl + C anytime to stop collecting manually.\n")

    page = 1
    try:
        while True:
            response = movies.top_rated(page=page)
            
            # Stop if no results found on current page
            if not response.get('results'):
                print(f"‚ö†Ô∏è No results found on page {page}. Stopping data collection.")
                break
            
            for movie_summary in response['results']:
                movie_id = movie_summary['id']
                movie_details = tmdb.Movies(movie_id).info()
                
                genre_names = [genre_map.get(gid, 'N/A') for gid in movie_summary.get('genre_ids', [])]
                
                data_row = {
                    'Movie/Show Name': movie_details.get('title'),
                    'Genre(s)': ', '.join(genre_names),
                    'IMDb Rating': movie_details.get('vote_average'),
                    'Number of Votes': movie_details.get('vote_count'),
                    'Release Year': movie_details.get('release_date', '')[:4],
                    'Runtime (optional)': movie_details.get('runtime', 'N/A'),
                    'Popularity Index (from API if available)': movie_details.get('popularity')
                }
                all_movie_data.append(data_row)
            
            print(f"‚úÖ Page {page} collected ({len(all_movie_data)} movies total).")
            page += 1
            time.sleep(0.5)  # Prevent hitting TMDb rate limits

    except KeyboardInterrupt:
        print("\n‚è∏ Manual stop requested. Saving collected data...")
    except Exception as e:
        print(f"\n‚ùå Error fetching data on page {page}: {e}")

    return all_movie_data


if __name__ == '__main__':
    raw_data = get_top_rated_movies()

    if raw_data:
        df = pd.DataFrame(raw_data)
        df = df.dropna(subset=['Movie/Show Name'])
        df.to_csv(OUTPUT_FILENAME, index=False, encoding='utf-8')
        print(f"\n‚ú® Data Collection Complete! Saved {len(df)} records to {OUTPUT_FILENAME}")
    else:
        print("\n‚ùå No data was collected. Check your API key and network connection.")


üé¨ Starting data collection from TMDb Top Rated movies...
üëâ Press Ctrl + C anytime to stop collecting manually.

‚úÖ Page 1 collected (20 movies total).
‚úÖ Page 2 collected (40 movies total).
‚úÖ Page 3 collected (60 movies total).
‚úÖ Page 4 collected (80 movies total).
‚úÖ Page 5 collected (100 movies total).
‚úÖ Page 6 collected (120 movies total).

‚è∏ Manual stop requested. Saving collected data...

‚ú® Data Collection Complete! Saved 120 records to movies_tmdb_api_raw.csv
