# Scraping Nigerian Movies Data From Wikipedia (1992 - 2023)

## Import Libraries for Extraction

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

## Extract Data for 2015

In [2]:
# Attributes to Extract
year = []
title = []
director = []
cast = []
genre = []

# Define the URL
df15_link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2015"

# Send an HTTP GET request to the URL
response = requests.get(df15_link)

# Parse the HTML content of the page using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find the table(s) you want to extract
# You may need to inspect the HTML source to find the correct table
tables = soup.find_all("table", {"class": "wikitable"})

# Iterate through each table
for table in tables:
    # Iterate through the rows of the table
    for row in table.find_all('tr')[1:]:  # Skip the header row
        columns = row.find_all('td')
        if len(columns) >= 4:  # Make sure the row has enough columns
            # Extract data and append to respective lists
            title.append(columns[2].text.strip())
            director.append(columns[3].text.strip())
            cast.append(columns[4].text.strip())
            genre.append(columns[5].text.strip())

            # Create a DataFrame
df_2015 = pd.DataFrame({
    "Title": title,
    "Director": director,
    "Cast": cast,
    "Genre": genre
})

# Add the "Year" column with the year "2015" for all rows
df_2015.insert(0, "Year", 2015)

df_2015.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2015,Gone Too Far!,Destiny Ekaragha,OC UkejeAdelayo AdedayoShanika Warren-Markland...,Comedy
1,2015,Remi Vaughn Richards,Majid MichelOC UkejeDesmond ElliotJide KosokoO...,Romantic thriller,InkBlot Productions Closer Pictures FilmOne ...
2,2015,The Green Eyed,Blessing O. Oduefe,Nse Ikpe EtimKalu IkeagwuTamara Eteimo Blosso...,Drama
3,2015,Darasen Richards DJ Tee,Olu JacobsBimbo AkintolaGabriel Afolayan Rica...,Drama,Darasen Richards Films
4,2015,Bimbo ManuelFunlola Aofiyebi-RaimiSika Osei Ma...,Drama,Sparrow Productions,[3][4]
5,2015,Tunde Kelani,"Kemi ""Lala"" AkindojuSeun AkindeleKunle Afolaya...",Drama,Mainframe Films
6,2015,Uche JomboChet Anekwe,Drama,,[6]
7,2015,Obi Emelonye,Ramsey NouahMercy JohnsonMary Njoku,Drama,The Nollywood Factory
8,2015,Michael Uadiale Jr,Jackie AppiahBobby Obodo,Drama,GenMeMoir
9,2015,The Duplex,Ikechukwu Onyeka,Omoni OboliMike EzuruonyeUru Eke,Supernatural Thriller


## Extract Data for 1992

In [3]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_1992"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a DataFrame
df_1992 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "1992" for all rows
df_1992.insert(0, "Year", 1992)

df_1992 = df_1992.iloc[1:]

# Reset the index to start from 0
df_1992.reset_index(drop=True, inplace=True)

df_1992.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,1992,Living in Bondage 1,Chris Obi Rapu,Kanayo O. Kanayo Kenneth Okonkwo Okechukwu Ogu...,Drama / thriller
1,1992,Agba Man,Moses Olaiya,Moses Olaiya Shola Shoremekun Bankole Ayodeji ...,
2,1992,,,,
3,1992,,,,
4,1992,,,,
5,1992,,,,
6,1992,,,,
7,1992,,,,
8,1992,,,,
9,1992,,,,


## Extract Data for 1997

In [4]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_1997"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_1997 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "1997" for all rows
df_1997.insert(0, "Year", 1997)

df_1997 = df_1997.iloc[1:]

# Reset the index to start from 0
df_1997.reset_index(drop=True, inplace=True)

df_1997.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,1997,Back to Africa,Tony Abulu,Wole Amele Ella Asad Jimmy Johnson Lanre Hassan,
1,1997,Blood Money,Chico Ejiro,Zack Orji Kanayo O. Kanayo Francis Agu Sam Dede,
2,1997,Crossroad: The Beginning,Christyn Michaels,Gbenga Richards Ngozi Nwosu Emeka Ossai Kate H...,
3,1997,Dead End 2,Chico Ejiro,Zack Orji Liz Benson Sandra Achums Ameze Imari...,
4,1997,Died Wretched,Kenneth Nnebue,Tony Umez Eucharia Anunobi Rachel Oniga Tom Nj...,
5,1997,Flesh and Blood: The Jessie Chukwuma Story 2,Chico Ejiro,Ameze Imarhiagbe Richard Mofe-Damijo Bassey-In...,
6,1997,Forever,Amaka Igwe,Justus Esiri Hilda Dokubo John Nwaobi Ohi Aleg...,
7,1997,Hostages,Tade Ogidan,Tope Idowu Ofuafo Otomewo Richard Mofe Damijo ...,Action
8,1997,"Iya Ibeji Eleran Igbe (Mother of Twins, Seller...",Abbey Lanre,Aduke Adeyemo Gbolagade Akinpelu Dupe Johnson ...,
9,1997,Obe Gbona (Hot Soup),,Moses Olaiya Adejumo Iya Sala Adisa Baba Oyin ...,


## Extract Data for 1998

In [5]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_1998"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_1998 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "1998" for all rows
df_1998.insert(0, "Year", 1998)

df_1998 = df_1998.iloc[1:]

# Reset the index to start from 0
df_1998.reset_index(drop=True, inplace=True)

df_1998.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,1998,Diamond Ring,Tade Ogidan,Liz Benson Richard Mofe-Damijo Bimbo Akintola ...,Horror
1,1998,Sakobi,Zeb Ejiro,Saint Obi Susan Patrick Tony Umez Edith Ujay S...,
2,1998,Amadas,Andy Amenechi,Enebeli Elebuwa Saint Obi Bimbo Akintola Adaor...,
3,1998,Suicide Mission,Fred Amata,Richard Mofe-Damijo Regina Askia Ameze Imarhia...,
4,1998,Oracle,Andy Amenechi,Pete Edochie Prince James Uche Enebeli Elebuwa...,
5,1998,Scores to Settle,Chico Ejiro,Richard Mofe Damijo Liz Benson Omotola Jalade ...,
6,1998,Yogo Pam Pam,Kingsley Ogoro,Nkem Owoh Okey Bakasi,
7,1998,Most Wanted,Tunji Bamishigbin,Regina Askia Ibinabo Fiberisima Ayo Adesanya L...,Crime action
8,1998,,,,
9,1998,,,,


## Extract Data for 2001

In [6]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2001"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2001 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2001" for all rows
df_2001.insert(0, "Year", 2001)

df_2001 = df_2001.iloc[1:]

# Reset the index to start from 0
df_2001.reset_index(drop=True, inplace=True)

df_2001.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2001,Girls Hostel 1 and 2,Ndubuisi Okoh,Olu Jacobs Empress Njamah Alexandra Lopez Uche...,
1,2001,Holy Law: Sharia,Ejike Asiegbu,Alex Usifo Ejike Asiegbu Ibrahim Mandawari Rac...,
2,2001,Last Prophet,Lancelot Imasuen,Zulu Adigwe Ejike Asiegbu Franca Brown Larry K...,
3,2001,Oil Village 1 and 2,Kalu Anya,Sam Loco Efe Sam Obiekheme Sandra Achums Nnamd...,
4,2001,Okuzu Massacre: The Robbers Revenge,John Evah,Segun Arinze Hanks Anuku Amaechi Muonagor Gent...,
5,2001,Outkast,Chico Ejiro,Sandra Achums Lilian Bach,Thriller
6,2001,,,,
7,2001,,,,
8,2001,,,,
9,2001,,,,


## Extract Data for 2002

In [7]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2002"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2002 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2002" for all rows
df_2002.insert(0, "Year", 2002)

df_2002 = df_2002.iloc[1:]

# Reset the index to start from 0
df_2002.reset_index(drop=True, inplace=True)

df_2002.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2002,Aki na Ukwa,Amayo Uzo Philips,Osita Iheme Chinedu Ikedieze,Comedy
1,2002,Formidable Force,Teco Benson,Hanks Anuku Ernest Asuzu George Davidson Genev...,
2,2002,Okada Man,Tchidi Chikere,Nkem Owoh Patience Ozokwor Pete Eneh David Ihesie,
3,2002,The Last Vote,Andy Amenechi,Sam Dede Olu Jacobs Acho Ugenyi Sandra Achums,
4,2002,,,,
5,2002,,,,
6,2002,,,,
7,2002,,,,
8,2002,,,,
9,2002,,,,


## Extract Data for 2003

In [8]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2003"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2003 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2003" for all rows
df_2003.insert(0, "Year", 2003)

df_2003 = df_2003.iloc[1:]

# Reset the index to start from 0
df_2003.reset_index(drop=True, inplace=True)

df_2003.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2003,2 Rats,Andy Chukwu,Osita Iheme Chinedu Ikedieze,Comedy
1,2003,Above Death: In God We Trust,Simi Opeoluwa,Pete Edochie Kate Henshaw-Nuttal Genevieve Nna...,Drama
2,2003,Abuja Connection,Adim Williams,,Action / drama / thriller
3,2003,Baby Police,Amayo Uzo Philips,Ijeoma Angel Boniface Okey Billy Boniface Osit...,Comedy / drama
4,2003,Emotional Crack,Lancelot Imasuen,Ramsey Nouah Dakore Egbuson Stephanie Okereke ...,
5,2003,Kasalama: The Slave Merchant,Kenneth Egbuna,Sam Loco Efe Chiwetalu Agu Tom Njemanze Emeka Ani,
6,2003,Mission To Africa,Joy Dickson,Olu Jacobs Segun Arinze Gloria Anozie Bruno Jnr,Thriller / drama
7,2003,Oil Money,Neville Ossai,Clem Ohameze Chidi Mokeme Chijioke Abagwe Maur...,
8,2003,Osuofia in London,Kingsley Ogoro,Nkem Owoh,Comedy
9,2003,,,,


## Extract Data for 2004

In [9]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2004"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2004 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2004" for all rows
df_2004.insert(0, "Year", 2004)

df_2004 = df_2004.iloc[1:]

# Reset the index to start from 0
df_2004.reset_index(drop=True, inplace=True)

df_2004.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2004,Fateful Love,Simi Opeoluwa,Ramsey Nouah Omotola Jalade Ekeinde Paul Obaze...,
1,2004,Goodbye New York 1 and 2,Tchidi Chikere,Genevieve Nnaji Jim lyke Rita Dominic Chidi Mo...,
2,2004,Home and Abroad,Lancelot Imasuen,John Okafor Victor Oswuagwu Izoya Isaac Rita A...,
3,2004,Last Girl Standing 1 and 2,John Uche,Jim Iyke Stepahnie Okereke Robert Peters Empre...,
4,2004,Missing Angel,Charles Novia,Stella Damasus Aboderin Desmond Elliot,Drama
5,2004,The Mayors,Dickson Iroegbu,Richard Mofe-Damijo Sam Dede Segun Arinze,Drama
6,2004,The London Boy,Simi Opeoluwa,Ramsey Nouah Simone McIntyre Segun Arinze Uche...,
7,2004,London Forever,Chico Ejiro,Shan George Lanre Falana Lilian Bach Rachel Oniga,
8,2004,Mr Ibu in London,Adim Williams,John Okafor Ishola Oshun Kareem Adepoju Femi F...,
9,2004,,,,


## Extract Data for 2005

In [10]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2005"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2005 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2005" for all rows
df_2005.insert(0, "Year", 2005)

df_2005 = df_2005.iloc[1:]

# Reset the index to start from 0
df_2005.reset_index(drop=True, inplace=True)

df_2005.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2005,Rising Moon,Andy Nwakolor,"Akume Akume, Arthur Brooks, Justus Esiri, Onye...",Drama
1,2005,,,,
2,2005,,,,
3,2005,,,,
4,2005,,,,
5,2005,,,,
6,2005,,,,
7,2005,,,,
8,2005,,,,
9,2005,,,,


## Extract Data for 2006

In [11]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2006"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2006 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2006" for all rows
df_2006.insert(0, "Year", 2006)

df_2006 = df_2006.iloc[1:]

# Reset the index to start from 0
df_2006.reset_index(drop=True, inplace=True)

df_2006.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2006,30 Days,Mildred Okwo,Genevieve Nnaji Joke Silva Segun Arinze,Action / thriller
1,2006,Abeni,,,
2,2006,Games Men Play,Lancelot Oduwa Imasuen,Kate Henshaw-Nuttal Ini Edo Chioma Chukwuka Ch...,
3,2006,Girls Cot 1-3. 2006,Afam Okereke,Genevieve Nnaji Rita Dominic Ini Edo Uche Jombo,
4,2006,Manko,Alhaji Sagir Mohammed,Yahaya Alfa Abdullahi Mohamed Bida John Gana M...,
5,2006,Mr Lecturer,Prince Emeka Ani,Nkem Owoh Sam Loco Efe Stella Ikwuegbu Chidinm...,
6,2006,Night in the Philippines 1 and 2,Zeb Ejiro,Desmond Elliot Ibinabo Fiberesima Marie Eboka ...,
7,2006,Sitanda,Izu Ojukwu,Stephanie Okereke,Adventure / drama
8,2006,The Amazing Grace,Jeta Amata,Joke Silva Nick Moran Scott Cleverdon,
9,2006,The Narrow Path,Tunde Kelani,Sola Asedeko Seyi Fasuyi Eniola Olaniyan Ayo B...,


## Extract Data for 2007

In [12]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2007"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2007 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2007" for all rows
df_2007.insert(0, "Year", 2007)

df_2007 = df_2007.iloc[1:]

# Reset the index to start from 0
df_2007.reset_index(drop=True, inplace=True)

df_2007.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2007,Ezra,Newton I. Aduaka,Mamoudu Turay Kamara,Drama
1,2007,Europe by Road.. Miles Away from Africa 1 and 2,Ikenna Ezeugwu,Kevin Bucks Leo Chimmezie Chris Onyenso Rahim Kas,
2,2007,Final Hour 1 and 2,Afam Okereke,Oge Okoye Nonso Diobi Uche Jombo Tonto Dikeh,
3,2007,Irapada (Redemption),Kunle Afolayan Biodun Aleja,Kunle Afolayan Deola Oloyede Jotham Ayuba Ange...,
4,2007,Letters to a Stranger,Fred Amata,Genevieve Nnaji Joke Silva Yemi Blaq,Romantic comedy
5,2007,Last Messiah,Moses Ebere,Nkem Owoh Fabian Adibe Roy Denani Miriam Apolo,
6,2007,Life Incidence: A Day to Destiny,Iyke Odife,Jim Iyke Mike Ezuruonye Ebube Nwagbo Browny Ig...,
7,2007,Made in Cambridge,Mac-Collins Chidebe,Nkem Owoh Louisa Nwobodo Funmi Holder Gardiel ...,
8,2007,The Faculty,Ugo Ugbor,Ramsey Nouah Jim Iyke Oge Okoye McMorris Ndubueze,
9,2007,,,,


## Extract Data for 2008

In [13]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2008"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2008 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2008" for all rows
df_2008.insert(0, "Year", 2008)

df_2008 = df_2008.iloc[1:]

# Reset the index to start from 0
df_2008.reset_index(drop=True, inplace=True)

df_2008.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2008,Hottest Babes 1 and 2,Emeka Nwabueze,Oge Okoye Francis Duru Chika Ike Uche Jombo,
1,2008,Jenifa 1 and 2,Muhydeen S. Ayinde,Funke Akindele Ronke Odusanya Mosunmola Filani...,Comedy
2,2008,Liquid Black Gold,Ikenna Emma Aniekwe,Sam Dede Justus Esiri Enebeli Elebuwa Gentle Jack,
3,2008,Through the Glass,Stephanie Okereke,Stephanie Okereke Pascal Atuma,Comedy
4,2008,,,,
5,2008,,,,
6,2008,,,,
7,2008,,,,
8,2008,,,,
9,2008,,,,


## Extract Data for 2009

In [14]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2009"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2009 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2009" for all rows
df_2009.insert(0, "Year", 2009)

df_2009 = df_2009.iloc[1:]

# Reset the index to start from 0
df_2009.reset_index(drop=True, inplace=True)

df_2009.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2009,Forbidden Fruit,Frank Rajah Arase,John Dumelo Majid Michel Jackie Appiah Yvonne ...,
1,2009,Guilty Pleasures,Desmond Elliot,Ramsey Nouah Majid Michel Nse Ikpe Etim Mercy ...,
2,2009,Naked Girls 1 and 2,Cyril Jackson,Vincent Opurum Sean Blessed Tonto Dike Enebeli...,
3,2009,Nigerian Girls 1 and 2,Dandy Chukwuemeka Echefu,Uche Elendu Emeka Enyiocha McMorris Ndubueze U...,
4,2009,Reloaded,Lancelot Oduwa Imasuen,Ramsey Nouah Stephanie Okereke Uche Jombo Van ...,Romantic drama
5,2009,The Figurine (Araromire),Kunle Afolayan,Kunle Afolayan Ramsey Nouah Omoni Oboli Funlol...,Thriller
6,2009,,,,
7,2009,,,,
8,2009,,,,
9,2009,,,,


## Extract Data for 2010

In [15]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2010"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2010 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2010" for all rows
df_2010.insert(0, "Year", 2010)

df_2010 = df_2010.iloc[1:]

# Reset the index to start from 0
df_2010.reset_index(drop=True, inplace=True)

df_2010.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2010,Anchor Baby,Lonzo Nzekwe,Omoni Oboli Sam Sarpong Terri Oliver,Drama / thriller
1,2010,Aramotu,Niji Akanni,Idiat Sobande Kayode Odumosu Ireti Osayemi-Bak...,Drama
2,2010,Between Kings and Queens,Joy Dickson,Jim Iyke Nakia Burrise DaJuan Johnson,Romance / Action
3,2010,Braids on a Bald Head,Ishaya Bako,,Drama
4,2010,Bursting Out,Desmond Elliot,Genevieve Nnaji Majid Michel Desmond Elliot Su...,Drama
5,2010,Good Girls Gone Bad 1-4,Nonso Emekaekwue,Oge Okoye Nonso Diobi Chika Ike Halimah Abubakar,
6,2010,Holding Hope,Desmond Elliot,Nadia Buari Uche Jombo Desmond Elliot,Drama
7,2010,"Ijé, the Journey",Chineze Anyaene,Genevieve Nnaji Odalys García Omotola Jalade-E...,Drama
8,2010,Inale,Jeta Amata,Caroline ChikezieHakeem Kae-KazimNse Ikpe Etim...,
9,2010,Men in Love,John Dumelo,Tonto Dike Muna Obiekwe Halima Abubakar,Drama


## Extract Data for 2011

In [16]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2011"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2011 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2011" for all rows
df_2011.insert(0, "Year", 2011)

df_2011.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2011,Ibu in Campus 1-4,Charles Inojie,John Okafor Charles Inojie Okey Bakassi Cynthi...,
1,2011,Nkwocha,Stan Amadi,Chiwetalu AguQueen NwaokoyeUche Elendu Walter,
2,2011,The Mirror Boy,Obi Emelonye,Genevieve Nnaji Osita Iheme Edward Kagutuzi Fa...,Drama / fantasy / adventure
3,2011,Two Brides and a Baby,Teco Benson,OC Ukeje Stella Damasus-Aboderin Kalu Ikeagwu ...,Romantic drama
4,2011,I'll Take My Chances,Desmond Elliot,Ini Edo Bryan Okwara Sam Loco Efe Jide Kosoko,Dance / romantic drama
5,2011,,,,
6,2011,,,,
7,2011,,,,
8,2011,,,,
9,2011,,,,


## Extract Data for 2012

In [17]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2012"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2012 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2012" for all rows
df_2012.insert(0, "Year", 2012)

df_2012.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2012,Phone Swap,Kunle Afolayan,Nse Ikpe EtimWale OjoLydia ForsonJoke Silva,Romantic comedy
1,2012,Journey to Self,Tope Oshin Ogun,Nse Ikpe EtimDakore AkandeKatherine ObiangTosi...,
2,2012,Adesuwa,Lancelot Oduwa Imasuen,Olu JacobsBob-Manuel UdokwuNgozi EzeonuKofi Ad...,
3,2012,Last Flight to Abuja,Obi Emelonye,Omotola Jalade EkeindeHakeem Kae-KazimJim Iyke,Disaster / thriller
4,2012,Fuelling Poverty,Ishaya Bako,,Documentary
5,2012,Turning Point,Niyi Towolawi,Jackie Appiah K.D. AubertErnie HudsonTodd Brid...,Drama
6,2012,Weekend Getaway,Desmond Elliot,Genevieve Nnaji Uti NwachukwuIni EdoRamsey Nou...,Romantic drama
7,2012,The Meeting,Mildred Okwo,Rita Dominic Femi JacobsLinda EjioforJide Koso...,Romantic drama
8,2012,Hoodrush,Dimeji Ajibola,OC Ukeje Bimbo AkintolaGabriel AfolayanChelsea...,Musical thriller
9,2012,Amina,Christian Ashaiku,OC Ukeje Omotola Jalade EkeindeWil JohnsonVan ...,Psychological drama


## Extract Data for 2013

In [18]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2013"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2013 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2013" for all rows
df_2013.insert(0, "Year", 2013)

df_2013.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2013,Forgetting June,Ikechukwu Onyeka,Majid MichelBeverly NayaMbong Amata Blossom Ch...,Romantic drama
1,2013,Broken,Bright Wonder Obasi,Nse Ikpe EtimBimbo ManuelKalu Ikeagwu,Drama
2,2013,Murder at Prime Suites,Eneaji Chris Eneng,Joseph BenjaminChelsea EzeKeira Hewatch Okey U...,Crime / thriller
3,2013,Torn,Moses Inwang,Monalisa ChindaIreti DoyleJoseph Benjamin Tope...,Psychological thriller
4,2013,A Mile from Home,Eric Aghimien,Tope TedelaChiedozie 'Sambasa' NzeribeAlex Aya...,Romantic drama
5,2013,Awakening,James OmokweEthan Okwara,OC UkejeKehinde BankoleFemi BrainardTope Tedela,Dark thriller
6,2013,Half of a Yellow Sun,Biyi Bandele,Chiwetel EjioforThandie NewtonGenevieve NnajiO...,Historical drama
7,2013,Secret Room,Eneaji Chris Eneng,OC UkejeJide KosokoLinda Ejiofor Lilian Esoro,Thriller
8,2013,B for Boy,Chika Anadu,Uche NwadiliNgozi NwanetoNonso Odogwu,
9,2013,Accident,Teco Benson,Chioma ChukwukaKalu IkeagwuFrederick Leonard,Thriller


## Extract Data for 2014

In [19]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2014"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2014 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2014" for all rows
df_2014.insert(0, "Year", 2014)

df_2014.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2014,Being Mrs Elliot,Omoni Oboli,Majid MichelOmoni OboliSylvia Oluchy Ayo Makun,Romantic comedy
1,2014,Render to Caesar,Desmond Ovbiagele Onyekachi Ejim,Gbenga AkinnagbeOmoni OboliWale Ojo Bimbo Manu...,Crime drama
2,2014,Tunnel,Stanlee Ohikhuare,Nse Ikpe Etim Femi JacobsWaje,Drama
3,2014,Iyore,Frank Rajah Arase,Rita Dominic Bukky WrightJoseph Benjamin,Drama
4,2014,Make a Move,Niyi Akinmolayan,Ivie Okujaye Tina MbaBeverly NayaWale Adebayo,Dance / musical
5,2014,Knocking on Heaven's Door,Desmond Elliot,Majid Michel Adesuwa Etomi Blossom Chukwujekwu...,Romantic drama / musical
6,2014,October 1,Kunle Afolayan,Sadiq Daba Kehinde Bankole David BaileDeola Sagoe,Dark psychological thriller
7,2014,Brother's Keeper,Ikechukwu Onyeka,Majid Michel Omoni Oboli Beverly NayaBarbara Soky,Thriller
8,2014,Dry,Stephanie Okereke,Stephanie Okereke Liz Benson William McNamaraD...,Drama
9,2014,30 Days in Atlanta,Robert Peters,Ayo Makun Vivica A. Fox Lynn WhitfieldKarlie Redd,Comedy


## Extract Data for 2016

In [20]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2016"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2016 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2016" for all rows
df_2016.insert(0, "Year", 2016)

df_2016.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2016,Elephant in the Room,Asurf Oluseyi,Ramsey NouahZainab Sheriff,Romantic Comedy
1,2016,Beyond Blood,Greg Odutayo,Kehinde BankoleJoseph Benjamin Bimbo ManuelCar...,Romantic Drama
2,2016,Love is in the Hair,Ansa Kpokpogri,Uti NwachukwuBishop Ime Toyin AimakhuOkey Bakassi,Romantic Comedy
3,2016,Couple of Days,Lord Tanner,Lilian EsoroKiki OmeiliAdesua Etomi Ademola Ad...,Romantic Comedy
4,2016,Suru L'ere,Mildred Okwo,Beverly NayaSeun AkindeleKemi Lala Tope Tedela,Comedy Drama
5,2016,93 Days,Steve Gukas,Bimbo AkintolaDanny GloverBimbo ManuelTim Reid,Docu-Drama
6,2016,The CEO,Kunle Afolayan,Kemi Lala AkindojuHilda DokuboJimmy Jean-Louis,Drama
7,2016,Ghana Must Go,Frank Rajah Arase,Yvonne OkoroBlossom ChukwujekwuNkem OwohIk Ogb...,Drama
8,2016,,,,
9,2016,,,,


## Extract Data for 2017

In [21]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2017"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2017 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2017" for all rows
df_2017.insert(0, "Year", 2017)

df_2017.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2017,The Royal Hibiscus Hotel,Ishaya Bako,Zainab BalogunKenneth OkolieDeyemi OkanlawonJo...,Comedy film
1,2017,American Driver,Moses Inwang,Evan KingJim IykeNse Ikpe EtimAyo MakunEmma Ny...,Comedy film
2,2017,Sudani from Nigeria,Zakariya,Soubin ShahirSamuel Abiola Robinson,Comedy and Drama
3,2017,Sobi's Mystic,Biodun Stephen,Bolaji OgunmolaKunle RemiMofe Duncan,Romantic Drama
4,2017,Lotanna,Toka McBaror,Liz BensonVictor OlaotanChris OkagbueAma Abebrese,Drama
5,2017,Isoken,Jadesola Osiberu,Dakore Akande Joseph BenjaminFunke AkindeleMar...,Romantic Drama
6,2017,10 Days in Sun City,Adze Ugha,Ayo MakunAdesuwa EtomiRichard Mofe-DamijoMercy...,Comedy Drama
7,2017,Christmas Is Coming,Ufuoma McDermott,Chioma ChukwukaDeyemi OkanlawonZack OrjiMary L...,Romantic comedy
8,2017,The Wedding Party 2,Niyi Akinmolayan,Sola Sobowale Patience Ozokwor Adesua Etomi Ba...,romantic comedy-drama film
9,2017,Christmas Is Coming,Ufuoma McDermott,Chioma ChukwukaDeyemi OkanlawonZack OrjiMary L...,Romantic comedy


## Extract Data for 2018

In [22]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2018"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2018 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2018" for all rows
df_2018.insert(0, "Year", 2018)

df_2018.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2018,Lionheart,Genevieve Nnaji,Genevieve NnajiNkem OwohPete EdochieKanayo O. ...,Drama
1,2018,Boss of All Bosses,Ike Nnaebue,Patience Ozokwor Adunni AdeAkpororoBishop Imeh,Comedy
2,2018,Ghost and the Tout,Charles Uwagbai,Toyin Abraham Rachael OkonkwoLasisi ElenuOmowu...,Ghost
3,2018,What Just Happened,Charles Uwagbai,Ufuoma McDermottOmoni OboliToyin AbrahamMike E...,Comedy
4,2018,Sylvia (2018 film),Daniel Orhiari,Chris AttohZainab Balogun Ini Dima-Okojie Ijeo...,Thriller drama film
5,2018,Merry Men: The Real Yoruba Demons,Toka Mcbaror,Ayo MakunFolarin FalanaRamsey NouahOsas Ighoda...,Romantic comedy
6,2018,Chief Daddy,Niyi Akinmolayan,Joke Silva Kate Henshaw Funke Akindele Folarin...,Comedy Drama
7,2018,Up North,Tope Oshin Ogun,Rahama Sadau Kanayo O. Kanayo Adesua Etomi Ban...,Romantic drama film
8,2018,,,,
9,2018,,,,


## Extract Data for 2019

In [23]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2019"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2019 = df[['Title','Director','Cast']]

# Add the "Year" column with the year "2019" for all rows
df_2019.insert(0, "Year", 2019)

df_2019.head(10)

Unnamed: 0,Year,Title,Director,Cast
0,2019,,,
1,2019,,,
2,2019,,,
3,2019,,,
4,2019,,,
5,2019,,,
6,2019,,,
7,2019,,,
8,2019,,,
9,2019,,,


## Extract Data for 2020

In [24]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2020"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2020 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2020" for all rows
df_2020.insert(0, "Year", 2020)

df_2020.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2020,Soft Work,Darasen Richards,Frank Donga Akin Lewis Shaffy Bello Sanni Mu’a...,Action drama
1,2020,Fate of Alakada: The Party Planner,Kayode Kasum,Toyin Abraham Mercy Eke Broda Shaggi,Action comedy
2,2020,Kakanfo,David Dida Tella,Bimbo Oshin Antar Laniyan Dele Odule,Adventure drama
3,2020,This Lady Called Life,Kayode Kasum,Bisola Aiyeola Efa Iwara,Drama
4,2020,Rise of the Saints,Samuel O . Olateru,Deyemi Okanlawon Rachel Oniga Tina Mba,Drama
5,2020,Lemonade,Lummie Edevibe,Kunle Remi Ayoola Ayolola Dino Melaye Linda Osifo,Drama
6,2020,Lagos to Abuja Coach,Olamide Balogun,Akin Lewis Adunni Ade Tina Mba Maryam Booth,Drama
7,2020,Citation,Kunle Afolayan,Temi Otedola Jimmy Jean-Lewis Kunle Afolayan J...,Drama
8,2020,Ratnik,Dimeji Ajibola,Osas Ighodaro Bolanle Ninalowo Adunni Ade Kari...,Science fiction
9,2020,Rattlesnake: The Ahanna story,Ramsey Nouah,Stan Nze Osas Ighodaro Bucci Franklin Efa Iwara,Crime drama


## Extract Data for 2021

In [25]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2021"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2021 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2021" for all rows
df_2021.insert(0, "Year", 2021)

df_2021.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2021,Prophetess,Niyi Akinmolayan,Toyin Abraham Lateef Adedimeji Muyiwa Ademola ...,Comedy
1,2021,Breaded Life,Biodun Stephen,Timini Egbuson Bimbo Ademoye Tina Mba Jide Kosoko,Romantic comedy
2,2021,Eyimofe,Ari and Chuko Esiri,Jude Akuwudike Temi Ami-Williams Cynthia Ebiji...,
3,2021,The Wait,Yemi Morafa,Nse Ikpe Etim Deyemi Okanlawon Jimmy Odukoya C...,Faith based
4,2021,Suga Suga,Richard Omos,Taiwo Obileye Ayo Adesanya Tana Adelana Wole Ojo,Comedy
5,2021,Mimi,Samuel ‘’Bigsam’’ Olatunji,Ali Baba Ireti Doyle Toyin Abraham Prince Jide...,
6,2021,Lockdown,Moses Inwang,Omotola Jalade-Ekeinde Tony Umez Charles Awuru...,Psychological thriller
7,2021,My Village People,Niyi Akinmolayan,Bovi Ugboma Nkem Owoh Amaechi Muonagor Charles...,Comedy
8,2021,The New Patriots,Terry Ayebo,Akin Lewis Dele Odule Bimbo Oshin Taiwo Ibikun...,Political thriller
9,2021,Badamasi,Obi Emelonye,Enyinna Nwigwe Charles Inojie Sani Danja Yakub...,Biopic


## Extract Data for 2022

In [26]:
# Define the URL
link = "https://en.wikipedia.org/wiki/List_of_Nigerian_films_of_2022"

df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

df = pd.concat([df1, df2, df3, df3], ignore_index=True)

# Create a Dataframe
df_2022 = df[['Title','Director','Cast','Genre']]

# Add the "Year" column with the year "2022" for all rows
df_2022.insert(0, "Year", 2022)

df_2022.head(10)

Unnamed: 0,Year,Title,Director,Cast,Genre
0,2022,King of Thieves (Ogundabede),Adebayo Tijani and Tope Adebayo Salami,Toyin AbrahamFemi Adebayo SalamiOdunlade Adeko...,Drama
1,2022,The Blood Covenant,Fiyin Gambo,Tobi BakreShawn FaquaUzor ArukweErica Nlewedim...,
2,2022,,,,
3,2022,,,,
4,2022,Tiger's Tail,Uyoyou Adia,Alexx EkuboNatacha AkideAkintoba Adeoluwa Zubb...,
5,2022,Hey You,Uyoyou Adia,Timini EgbusonEfe Irele Rotimi Salami Stan Nze...,Romantic comedy
6,2022,Sista,Biodun Stephen,Kehinde Bankole Bisola Aiyeola Deyemi Okanlawon,
7,2022,The Set Up 2,Naz Onuzo,Adesua EtomiNancy Isime Kehinde Bankole,
8,2022,,,,
9,2022,Anikulapo,Kunle Afolayan,,


## Merge the datasets

In [27]:
new_dataset_1992_2022 = pd.concat([df_1992, df_1997, df_1998, df_2001, df_2002, df_2003, df_2004, df_2005, df_2006, df_2007, df_2008, df_2010, df_2011, df_2012, df_2013, df_2014, df_2015, df_2016, df_2017, df_2018, df_2020, df_2021, df_2022], ignore_index=True)

### Sort the merged DataFrame by the "Year" column

In [39]:
new_dataset_1992_2022 = new_dataset_1992_2022.sort_values(by="Year")

### Drop rows with NaN in all columns

In [41]:
columns_to_check = new_dataset_1992_2022.columns.difference(["Year"])
new_dataset_1992_2022 = new_dataset_1992_2022.dropna(subset=columns_to_check, how='all')

### Reset the index

In [42]:
new_dataset_1992_2022 = new_dataset_1992_2022.reset_index(drop=True)

## Save Data to New File

In [44]:
new_dataset_1992_2022.to_csv('nigerian_movies_dataset_1992_2022.csv', index=False)