# Fetching Data through TMDB API 

### Importing necessary libraries

In [1]:
import pandas as pd
import requests as re

## Fetching data through API 

In [2]:
# TMDB API base URL
base_url = 'https://api.themoviedb.org/3/discover/movie'
api_key = '5b79eab8702a08b73d650a25fb17cfd2'

# Parameters that are constant for each request
params = {
    'include_adult': 'false',
    'include_video': 'false',
    'language': 'en-US',
    'sort_by': 'vote_average.desc',
    'without_genres': '99,10755',
    'vote_count.gte': '200',
    'api_key': api_key,
    'page': 1  # start from page 1
}

# First request to get total pages
response = re.get(base_url, params=params)

if response.status_code != 200:
    raise Exception(f"Failed to fetch data: {response.status_code}")

data = response.json()
total_pages = data['total_pages']
print(f"Total pages available: {total_pages}")

# Initialize list with first page results
all_movies = data['results']

# Loop through remaining pages
for page in range(2, total_pages + 1):
    print(f"Fetching page {page}...")
    params['page'] = page
    response = re.get(base_url, params=params)
    
    if response.status_code == 200:
        page_data = response.json()
        all_movies.extend(page_data['results'])
    else:
        print(f"Failed to fetch page {page}, status code: {response.status_code}")

# Convert to DataFrame
df = pd.DataFrame(all_movies)

Total pages available: 659
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
Fetching page 49...
Fetching page 50...
F

In [3]:
df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/zfbjgQE1uSd9wiPTX4VzsLi0rGG.jpg,"[18, 80]",278,en,The Shawshank Redemption,Imprisoned in the 1940s for the double murder ...,47.8340,/9cqNxx0GxF0bflZmeSMuL5tnGzr.jpg,1994-09-23,The Shawshank Redemption,False,8.709,28207
1,False,/tmU7GeKVybMWFButWEGl2M4GeiP.jpg,"[18, 80]",238,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",35.3778,/3bhkrj58Vtu7enYsRolD1fZdja1.jpg,1972-03-14,The Godfather,False,8.687,21383
2,False,/xyWKrni8WrYzqn7ztvI5nIY0h62.jpg,"[35, 10749]",1181678,en,¿Quieres ser mi hijo?,"Lu, a conformist woman in her forties, learns ...",2.4201,/9GuvODahvuFqdhuZ16aBLR4UJoP.jpg,2023-09-21,¿Quieres ser mi hijo?,False,8.576,284
3,False,/kGzFbGhp99zva6oZODW5atUtnqi.jpg,"[18, 80]",240,en,The Godfather Part II,In the continuing saga of the Corleone crime f...,17.3126,/hek3koDUyRQk7FIhPXsa6mT2Zc3.jpg,1974-12-20,The Godfather Part II,False,8.571,12918
4,False,/zb6fM1CX41D9rF9hdgclu0peUmy.jpg,"[18, 36, 10752]",424,en,Schindler's List,The true story of how businessman Oskar Schind...,37.8139,/sF1U4EUQS8YHUYjNl3pMGNIQyr0.jpg,1993-12-15,Schindler's List,False,8.565,16394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,False,/hlPOs551MEegrr6M3yYATBWy58e.jpg,"[10751, 16, 12, 35]",13654,en,101 Dalmatians II: Patch's London Adventure,"Being one of 101 takes its toll on Patch, who ...",3.6105,/rFTt7pNSCJKLZ2RZldJuq6ON6Ti.jpg,2003-03-02,101 Dalmatians II: Patch's London Adventure,False,6.026,1188
9996,False,/mPId0j7owQmeOQHyOl2kEbzQpXd.jpg,"[35, 28, 80]",9416,en,Money Talks,"Sought by police and criminals, a small-time h...",5.5379,/bN57Rl003E9pYred5kw9Rp8h9Np.jpg,1997-08-22,Money Talks,False,6.026,392
9997,False,/6hQgTpZVNHrLafE6IeSAoThQwkH.jpg,"[14, 53, 18]",8884,en,Franklyn,Set between the parallel worlds of contemporar...,3.6975,/5yns10om8I19j4OHy1zoVRilAnw.jpg,2008-10-16,Franklyn,False,6.000,360
9998,False,/c2Av25HOoF3hKWGO2MRicHtiqJa.jpg,"[80, 18, 9648, 53]",8284,en,The Limits of Control,A mysterious stranger works outside the law an...,2.5574,/5e4hjFt3V8LTUXPxvZ2plPpgGLk.jpg,2009-05-01,The Limits of Control,False,6.026,286


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   adult              10000 non-null  bool   
 1   backdrop_path      9985 non-null   object 
 2   genre_ids          10000 non-null  object 
 3   id                 10000 non-null  int64  
 4   original_language  10000 non-null  object 
 5   original_title     10000 non-null  object 
 6   overview           10000 non-null  object 
 7   popularity         10000 non-null  float64
 8   poster_path        9993 non-null   object 
 9   release_date       10000 non-null  object 
 10  title              10000 non-null  object 
 11  video              10000 non-null  bool   
 12  vote_average       10000 non-null  float64
 13  vote_count         10000 non-null  int64  
dtypes: bool(2), float64(2), int64(2), object(8)
memory usage: 957.2+ KB


# Web Scrapping

## Fetching Genre name through Web Scrapping

In [12]:
# now creating new column on the basis of genre_ids

#### we have genre_ids relation on other page of website so scraped the data to create new column

In [1]:
from bs4 import BeautifulSoup

# Example HTML (this should be your website code loaded as string)
html_code = '''
<ul id="with_genres" class="multi_select text" name="with_genres[]">
                    <li data-value="28"><a class="no_click" href="/discover/movie?with_genres=28">Action</a></li>
                    <li data-value="12"><a class="no_click" href="/discover/movie?with_genres=12">Adventure</a></li>
                    <li data-value="16"><a class="no_click" href="/discover/movie?with_genres=16">Animation</a></li>
                    <li data-value="35"><a class="no_click" href="/discover/movie?with_genres=35">Comedy</a></li>
                    <li data-value="80"><a class="no_click" href="/discover/movie?with_genres=80">Crime</a></li>
                    <li data-value="99"><a class="no_click" href="/discover/movie?with_genres=99">Documentary</a></li>
                    <li data-value="18"><a class="no_click" href="/discover/movie?with_genres=18">Drama</a></li>
                    <li data-value="10751"><a class="no_click" href="/discover/movie?with_genres=10751">Family</a></li>
                    <li data-value="14"><a class="no_click" href="/discover/movie?with_genres=14">Fantasy</a></li>
                    <li data-value="36"><a class="no_click" href="/discover/movie?with_genres=36">History</a></li>
                    <li data-value="27"><a class="no_click" href="/discover/movie?with_genres=27">Horror</a></li>
                    <li data-value="10402"><a class="no_click" href="/discover/movie?with_genres=10402">Music</a></li>
                    <li data-value="9648"><a class="no_click" href="/discover/movie?with_genres=9648">Mystery</a></li>
                    <li data-value="10749"><a class="no_click" href="/discover/movie?with_genres=10749">Romance</a></li>
                    <li data-value="878"><a class="no_click" href="/discover/movie?with_genres=878">Science Fiction</a></li>
                    <li data-value="10770"><a class="no_click" href="/discover/movie?with_genres=10770">TV Movie</a></li>
                    <li data-value="53"><a class="no_click" href="/discover/movie?with_genres=53">Thriller</a></li>
                    <li data-value="10752"><a class="no_click" href="/discover/movie?with_genres=10752">War</a></li>
                    <li data-value="37"><a class="no_click" href="/discover/movie?with_genres=37">Western</a></li>
                </ul>
'''

# Step 1: Parse the HTML
soup = BeautifulSoup(html_code, 'html.parser')

# Step 2: Find all <li> elements
li_tags = soup.find_all('li')
print(li_tags)
# Step 3: Create dictionary from data
genre_mapping = {}
for li in li_tags:
    genre_id = int(li['data-value'])  # get data-value attribute
    print(genre_id)
    genre_name = li.get_text(strip=True)  # get visible text (the genre name)
    print(genre_name)
    genre_mapping[genre_id] = genre_name

# Step 4: Print the extracted dictionary
print(genre_mapping)

[<li data-value="28"><a class="no_click" href="/discover/movie?with_genres=28">Action</a></li>, <li data-value="12"><a class="no_click" href="/discover/movie?with_genres=12">Adventure</a></li>, <li data-value="16"><a class="no_click" href="/discover/movie?with_genres=16">Animation</a></li>, <li data-value="35"><a class="no_click" href="/discover/movie?with_genres=35">Comedy</a></li>, <li data-value="80"><a class="no_click" href="/discover/movie?with_genres=80">Crime</a></li>, <li data-value="99"><a class="no_click" href="/discover/movie?with_genres=99">Documentary</a></li>, <li data-value="18"><a class="no_click" href="/discover/movie?with_genres=18">Drama</a></li>, <li data-value="10751"><a class="no_click" href="/discover/movie?with_genres=10751">Family</a></li>, <li data-value="14"><a class="no_click" href="/discover/movie?with_genres=14">Fantasy</a></li>, <li data-value="36"><a class="no_click" href="/discover/movie?with_genres=36">History</a></li>, <li data-value="27"><a class="no

In [None]:
## Now adding genre_name column in data as relation of genre_ids

In [14]:
def map_genre_ids_to_names(genre_ids):
    return [genre_mapping.get(genre_id, "Unknown") for genre_id in genre_ids]

# Create a new column 'genre_names'
df['genre_names'] = df['genre_ids'].apply(map_genre_ids_to_names)

In [15]:
df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,genre_names
0,False,/zfbjgQE1uSd9wiPTX4VzsLi0rGG.jpg,"[18, 80]",278,en,The Shawshank Redemption,Imprisoned in the 1940s for the double murder ...,47.8340,/9cqNxx0GxF0bflZmeSMuL5tnGzr.jpg,1994-09-23,The Shawshank Redemption,False,8.709,28207,"[Drama, Crime]"
1,False,/tmU7GeKVybMWFButWEGl2M4GeiP.jpg,"[18, 80]",238,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",35.3778,/3bhkrj58Vtu7enYsRolD1fZdja1.jpg,1972-03-14,The Godfather,False,8.687,21383,"[Drama, Crime]"
2,False,/xyWKrni8WrYzqn7ztvI5nIY0h62.jpg,"[35, 10749]",1181678,en,¿Quieres ser mi hijo?,"Lu, a conformist woman in her forties, learns ...",2.4201,/9GuvODahvuFqdhuZ16aBLR4UJoP.jpg,2023-09-21,¿Quieres ser mi hijo?,False,8.576,284,"[Comedy, Romance]"
3,False,/kGzFbGhp99zva6oZODW5atUtnqi.jpg,"[18, 80]",240,en,The Godfather Part II,In the continuing saga of the Corleone crime f...,17.3126,/hek3koDUyRQk7FIhPXsa6mT2Zc3.jpg,1974-12-20,The Godfather Part II,False,8.571,12918,"[Drama, Crime]"
4,False,/zb6fM1CX41D9rF9hdgclu0peUmy.jpg,"[18, 36, 10752]",424,en,Schindler's List,The true story of how businessman Oskar Schind...,37.8139,/sF1U4EUQS8YHUYjNl3pMGNIQyr0.jpg,1993-12-15,Schindler's List,False,8.565,16394,"[Drama, History, War]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,False,/hlPOs551MEegrr6M3yYATBWy58e.jpg,"[10751, 16, 12, 35]",13654,en,101 Dalmatians II: Patch's London Adventure,"Being one of 101 takes its toll on Patch, who ...",3.6105,/rFTt7pNSCJKLZ2RZldJuq6ON6Ti.jpg,2003-03-02,101 Dalmatians II: Patch's London Adventure,False,6.026,1188,"[Family, Animation, Adventure, Comedy]"
9996,False,/mPId0j7owQmeOQHyOl2kEbzQpXd.jpg,"[35, 28, 80]",9416,en,Money Talks,"Sought by police and criminals, a small-time h...",5.5379,/bN57Rl003E9pYred5kw9Rp8h9Np.jpg,1997-08-22,Money Talks,False,6.026,392,"[Comedy, Action, Crime]"
9997,False,/6hQgTpZVNHrLafE6IeSAoThQwkH.jpg,"[14, 53, 18]",8884,en,Franklyn,Set between the parallel worlds of contemporar...,3.6975,/5yns10om8I19j4OHy1zoVRilAnw.jpg,2008-10-16,Franklyn,False,6.000,360,"[Fantasy, Thriller, Drama]"
9998,False,/c2Av25HOoF3hKWGO2MRicHtiqJa.jpg,"[80, 18, 9648, 53]",8284,en,The Limits of Control,A mysterious stranger works outside the law an...,2.5574,/5e4hjFt3V8LTUXPxvZ2plPpgGLk.jpg,2009-05-01,The Limits of Control,False,6.026,286,"[Crime, Drama, Mystery, Thriller]"


# Cleaning the data as per requirment