# Scraping Nigerian Movies Data From IMDb

## Install Required Libraries

In [1]:
pip install requests beautifulsoup4 --user --no-warn-script-location

Note: you may need to restart the kernel to use updated packages.


## Import Required Libraries

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np

## Declare the headers

In [3]:
headers = {"Accept-Language": "en-US,en;q=0.5"}

## Attributes to Extract

In [4]:
movie_title = []
director_names = []
duration = []
actor_1_name = []
genres = []
num_votes = []
actor_2_name = []
actor_3_name = []
content_rating = []
release_year = []

## Set URL and Make HTTP Request

In [26]:
pages = np.arange(1, 1000, 50)

for response in pages:  
    response = requests.get("https://www.imdb.com/search/title/?country_of_origin=NG&start="+str(response)+"&ref_=adv_nxt")

## Identify the Website Structure

In [27]:
    soup = BeautifulSoup(response.text, 'html.parser')

In [28]:
    movie_data = soup.findAll('div', attrs={'class': 'lister-item mode-advanced'})

### Extract Movie Attributes

In [29]:
    for store in movie_data:
        # 1. Extract Movie Titles
        name = store.h3.a.text
        movie_title.append(name)
    
        # 2. Extract Movie Year
        year_element = store.h3.find('span', class_ = 'lister-item-year text-muted unbold')
        if year_element:
            year = year_element.text.replace('(', '').replace(')', '')
        else:
            year = "N/A"
        release_year.append(year)
    
        # 3. Extract Movie Duration
        runtime_element = store.p.find('span', class_ = "runtime")
        if runtime_element:
            runtime = runtime_element.text.replace(' min', '')
        else:
            runtime = "N/A"
        duration.append(runtime)
    
        # 4. Extract Director's Name
        director_element = store.find('p', class_ = "")
        if director_element:
            director_link = director_element.find('a')
            if director_link:
                director_name = director_link.text
            else:
                director_name = "N/A"
        else:
            director_name = "N/A"
        director_names.append(director_name)
    
        # 5. Extract Movie Rating
        rate = store.find('div', class_ = 'inline-block ratings-imdb-rating')
        if rate is not None:
            ratings = rate.text.replace('\n', '')
        else:
            ratings = "N/A"
        content_rating.append(ratings)
    
        # 6. Extract Actor 1 Name
        stars_label = store.select('p:-soup-contains(" Stars: ")')
        if stars_label:
            first_actor_element = stars_label[0].find_next('a')
            first_actor_name = first_actor_element.text if first_actor_element else "N/A"
        else:
            first_actor_name = "N/A"
        actor_1_name.append(first_actor_name)
    
        # 7. Extract Genres
        genre_element = store.p.find('span', class_='genre')
        if genre_element is not None:
            genre_text = genre_element.get_text(strip=True)
            genre_list = [genre.strip() for genre in genre_text.split(',')]
            if genre_list:
                genres.append(genre_list[0])  # Only append the first genre
            else:
                genres.append("N/A")
        else:
            genres.append("N/A")
        
        # 8. Extract Votes
        value = store.find_all('span', attrs = {'name': 'nv'})
        if value:
            vote = value[0].text
            num_votes.append(vote)
        else:
            num_votes.append('N/A')

## Create a DataFrame for Data

In [30]:
nigerian_movies = pd.DataFrame({'movie_title': movie_title, 'release_year': release_year, 'duration': duration, 'genres': genres, 'director_names': director_names, 'actor_1_name': actor_1_name, 'num_votes': num_votes, 'content_rating': content_rating})

In [31]:
nigerian_movies.head()

Unnamed: 0,movie_title,release_year,duration,genres,director_names,actor_1_name,num_votes,content_rating
0,The Black Book,2023,124.0,Thriller,Editi Effiong,,905,5.1
1,SHE Must Be Obeyed,2023–,,Comedy,Funke Akindele,,89,6.1
2,After Party,2021,,Comedy,Tope Alake,,84,7.2
3,Ijogbon,2023,115.0,Drama,Kunle Afolayan,,74,5.6
4,Something Like Gold,2023,,Drama,Kayode Kasum,,19,9.4


In [33]:
nigerian_movies.to_csv('nigerian_movies.csv', index=False)