# IMDB WebScraping Project

In [1]:
# importing required libraries to scrape the data from website

from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
# storing the website link to scrape in a variable 'url'

url = "https://www.imdb.com/chart/top/"

In [3]:
url

'https://www.imdb.com/chart/top/'

In [4]:
# As IMDB site blocks scraping spider. So, I'm using 'User-Agent' to bypass the scrape block.

page = requests.get(url,headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"})

In [5]:
# Website is accessible and Response is 200.

page

<Response [200]>

In [6]:
page.content;

In [14]:
# Using BeautifulSoup to make the html file more readable.

%%capture
soup = BeautifulSoup(page.content,'html.parser')
print(soup.prettify())
    


UsageError: Line magic function `%%capture` not found.


In [19]:
# below are the tags that consists of Title and Ratings

soup.find_all('h3', class_="ipc-title__text");

In [20]:
soup.find_all(class_='ipc-rating-star');

In [21]:
scraped_movie = soup.find_all('h3', class_="ipc-title__text")

In [22]:
movies = []

for movie in scraped_movie:
    movies.append(movie.get_text().strip())
movies    

['IMDb Charts',
 '1. The Shawshank Redemption',
 '2. The Godfather',
 '3. The Dark Knight',
 '4. The Godfather: Part II',
 '5. 12 Angry Men',
 "6. Schindler's List",
 '7. The Lord of the Rings: The Return of the King',
 '8. Pulp Fiction',
 '9. The Lord of the Rings: The Fellowship of the Ring',
 '10. Il Buono, Il Brutto, Il Cattivo',
 '11. Forrest Gump',
 '12. The Lord of the Rings: The Two Towers',
 '13. Fight Club',
 '14. Dune: Part Two',
 '15. Inception',
 '16. Star Wars: Episode V - The Empire Strikes Back',
 '17. The Matrix',
 '18. GoodFellas',
 "19. One Flew Over the Cuckoo's Nest",
 '20. Se7en',
 '21. Interstellar',
 "22. It's a Wonderful Life",
 '23. Shichinin No Samurai',
 '24. The Silence of the Lambs',
 '25. Saving Private Ryan',
 '26. City of God',
 '27. Life Is Beautiful',
 '28. The Green Mile',
 '29. Terminator 2: Judgment Day',
 '30. Star Wars: Episode IV - A New Hope',
 '31. Back to the Future',
 '32. Spirited Away',
 '33. The Pianist',
 '34. Parasite',
 '35. Spider-man

In [23]:
# As the result of movies contain most of the unwanted things along with the required data.
# I'm using .isdigit() method to filter out names with numerical data for eg. 1. The Shawshank Redemption

clean_movies = [item for item in movies if item[0].isdigit()]

for i in clean_movies:
    print(i)

1. The Shawshank Redemption
2. The Godfather
3. The Dark Knight
4. The Godfather: Part II
5. 12 Angry Men
6. Schindler's List
7. The Lord of the Rings: The Return of the King
8. Pulp Fiction
9. The Lord of the Rings: The Fellowship of the Ring
10. Il Buono, Il Brutto, Il Cattivo
11. Forrest Gump
12. The Lord of the Rings: The Two Towers
13. Fight Club
14. Dune: Part Two
15. Inception
16. Star Wars: Episode V - The Empire Strikes Back
17. The Matrix
18. GoodFellas
19. One Flew Over the Cuckoo's Nest
20. Se7en
21. Interstellar
22. It's a Wonderful Life
23. Shichinin No Samurai
24. The Silence of the Lambs
25. Saving Private Ryan
26. City of God
27. Life Is Beautiful
28. The Green Mile
29. Terminator 2: Judgment Day
30. Star Wars: Episode IV - A New Hope
31. Back to the Future
32. Spirited Away
33. The Pianist
34. Parasite
35. Spider-man: Across the Spider-verse
36. Psycho
37. Gladiator
38. The Lion King
39. Léon
40. The Departed
41. American History X
42. Whiplash
43. The Prestige
44. Gr

In [24]:
clean_movies

['1. The Shawshank Redemption',
 '2. The Godfather',
 '3. The Dark Knight',
 '4. The Godfather: Part II',
 '5. 12 Angry Men',
 "6. Schindler's List",
 '7. The Lord of the Rings: The Return of the King',
 '8. Pulp Fiction',
 '9. The Lord of the Rings: The Fellowship of the Ring',
 '10. Il Buono, Il Brutto, Il Cattivo',
 '11. Forrest Gump',
 '12. The Lord of the Rings: The Two Towers',
 '13. Fight Club',
 '14. Dune: Part Two',
 '15. Inception',
 '16. Star Wars: Episode V - The Empire Strikes Back',
 '17. The Matrix',
 '18. GoodFellas',
 "19. One Flew Over the Cuckoo's Nest",
 '20. Se7en',
 '21. Interstellar',
 "22. It's a Wonderful Life",
 '23. Shichinin No Samurai',
 '24. The Silence of the Lambs',
 '25. Saving Private Ryan',
 '26. City of God',
 '27. Life Is Beautiful',
 '28. The Green Mile',
 '29. Terminator 2: Judgment Day',
 '30. Star Wars: Episode IV - A New Hope',
 '31. Back to the Future',
 '32. Spirited Away',
 '33. The Pianist',
 '34. Parasite',
 '35. Spider-man: Across the Spi

In [25]:
# Now, using .split() method to filter out names alone.

movies_clean = [i.split(' ',1)[1] for i in clean_movies]

In [26]:
movies_clean

['The Shawshank Redemption',
 'The Godfather',
 'The Dark Knight',
 'The Godfather: Part II',
 '12 Angry Men',
 "Schindler's List",
 'The Lord of the Rings: The Return of the King',
 'Pulp Fiction',
 'The Lord of the Rings: The Fellowship of the Ring',
 'Il Buono, Il Brutto, Il Cattivo',
 'Forrest Gump',
 'The Lord of the Rings: The Two Towers',
 'Fight Club',
 'Dune: Part Two',
 'Inception',
 'Star Wars: Episode V - The Empire Strikes Back',
 'The Matrix',
 'GoodFellas',
 "One Flew Over the Cuckoo's Nest",
 'Se7en',
 'Interstellar',
 "It's a Wonderful Life",
 'Shichinin No Samurai',
 'The Silence of the Lambs',
 'Saving Private Ryan',
 'City of God',
 'Life Is Beautiful',
 'The Green Mile',
 'Terminator 2: Judgment Day',
 'Star Wars: Episode IV - A New Hope',
 'Back to the Future',
 'Spirited Away',
 'The Pianist',
 'Parasite',
 'Spider-man: Across the Spider-verse',
 'Psycho',
 'Gladiator',
 'The Lion King',
 'Léon',
 'The Departed',
 'American History X',
 'Whiplash',
 'The Prestige

In [27]:
soup.find_all('span', class_="ipc-rating-star");

In [28]:
ratings_raw = soup.find_all('span', class_="ipc-rating-star")

In [30]:
ratings_raw;

In [31]:
ratings= []


for rating in ratings_raw:
    ratings.append(rating.get_text().strip())
print(ratings)    

['9.3\xa0(2.9M)', 'Rate', '9.2\xa0(2M)', 'Rate', '9.0\xa0(2.9M)', 'Rate', '9.0\xa0(1.4M)', 'Rate', '9.0\xa0(859K)', 'Rate', '9.0\xa0(1.4M)', 'Rate', '9.0\xa0(2M)', 'Rate', '8.9\xa0(2.2M)', 'Rate', '8.9\xa0(2M)', 'Rate', '8.8\xa0(807K)', 'Rate', '8.8\xa0(2.2M)', 'Rate', '8.8\xa0(1.8M)', 'Rate', '8.8\xa0(2.3M)', 'Rate', '8.9\xa0(248K)', 'Rate', '8.8\xa0(2.5M)', 'Rate', '8.7\xa0(1.4M)', 'Rate', '8.7\xa0(2M)', 'Rate', '8.7\xa0(1.3M)', 'Rate', '8.7\xa0(1.1M)', 'Rate', '8.6\xa0(1.8M)', 'Rate', '8.7\xa0(2.1M)', 'Rate', '8.6\xa0(497K)', 'Rate', '8.6\xa0(365K)', 'Rate', '8.6\xa0(1.5M)', 'Rate', '8.6\xa0(1.5M)', 'Rate', '8.6\xa0(797K)', 'Rate', '8.6\xa0(740K)', 'Rate', '8.6\xa0(1.4M)', 'Rate', '8.6\xa0(1.2M)', 'Rate', '8.6\xa0(1.4M)', 'Rate', '8.5\xa0(1.3M)', 'Rate', '8.6\xa0(843K)', 'Rate', '8.5\xa0(906K)', 'Rate', '8.5\xa0(948K)', 'Rate', '8.6\xa0(358K)', 'Rate', '8.5\xa0(715K)', 'Rate', '8.5\xa0(1.6M)', 'Rate', '8.5\xa0(1.1M)', 'Rate', '8.5\xa0(1.2M)', 'Rate', '8.5\xa0(1.4M)', 'Rate', '8.5\xa

In [32]:
cl_ratings = [item for item in ratings if item[0].isdigit()]

for i in cl_ratings:
    
    print(i)

9.3 (2.9M)
9.2 (2M)
9.0 (2.9M)
9.0 (1.4M)
9.0 (859K)
9.0 (1.4M)
9.0 (2M)
8.9 (2.2M)
8.9 (2M)
8.8 (807K)
8.8 (2.2M)
8.8 (1.8M)
8.8 (2.3M)
8.9 (248K)
8.8 (2.5M)
8.7 (1.4M)
8.7 (2M)
8.7 (1.3M)
8.7 (1.1M)
8.6 (1.8M)
8.7 (2.1M)
8.6 (497K)
8.6 (365K)
8.6 (1.5M)
8.6 (1.5M)
8.6 (797K)
8.6 (740K)
8.6 (1.4M)
8.6 (1.2M)
8.6 (1.4M)
8.5 (1.3M)
8.6 (843K)
8.5 (906K)
8.5 (948K)
8.6 (358K)
8.5 (715K)
8.5 (1.6M)
8.5 (1.1M)
8.5 (1.2M)
8.5 (1.4M)
8.5 (1.2M)
8.5 (980K)
8.5 (1.4M)
8.5 (308K)
8.6 (68K)
8.5 (1.1M)
8.5 (604K)
8.5 (924K)
8.5 (281K)
8.5 (258K)
8.5 (520K)
8.5 (348K)
8.5 (945K)
8.5 (195K)
9.0 (105K)
8.4 (707K)
8.5 (1.7M)
8.4 (1.3M)
8.4 (1.2M)
8.4 (1M)
8.4 (409K)
8.4 (235K)
8.4 (211K)
8.4 (1.2M)
8.4 (665K)
8.4 (1.1M)
8.4 (137K)
8.4 (236K)
8.4 (759K)
8.4 (1.6M)
8.4 (1.8M)
8.4 (516K)
8.3 (1.2M)
8.3 (632K)
8.4 (585K)
8.4 (426K)
8.3 (1.1M)
8.4 (263K)
8.3 (1.1M)
8.4 (1.3M)
8.4 (1.5M)
8.3 (431K)
8.3 (1.1M)
8.4 (316K)
8.3 (376K)
8.4 (53K)
8.4 (432K)
8.3 (685K)
8.3 (259K)
8.4 (104K)
8.3 (895K)
8.4 (96K)
8

In [33]:
cl_ratings

['9.3\xa0(2.9M)',
 '9.2\xa0(2M)',
 '9.0\xa0(2.9M)',
 '9.0\xa0(1.4M)',
 '9.0\xa0(859K)',
 '9.0\xa0(1.4M)',
 '9.0\xa0(2M)',
 '8.9\xa0(2.2M)',
 '8.9\xa0(2M)',
 '8.8\xa0(807K)',
 '8.8\xa0(2.2M)',
 '8.8\xa0(1.8M)',
 '8.8\xa0(2.3M)',
 '8.9\xa0(248K)',
 '8.8\xa0(2.5M)',
 '8.7\xa0(1.4M)',
 '8.7\xa0(2M)',
 '8.7\xa0(1.3M)',
 '8.7\xa0(1.1M)',
 '8.6\xa0(1.8M)',
 '8.7\xa0(2.1M)',
 '8.6\xa0(497K)',
 '8.6\xa0(365K)',
 '8.6\xa0(1.5M)',
 '8.6\xa0(1.5M)',
 '8.6\xa0(797K)',
 '8.6\xa0(740K)',
 '8.6\xa0(1.4M)',
 '8.6\xa0(1.2M)',
 '8.6\xa0(1.4M)',
 '8.5\xa0(1.3M)',
 '8.6\xa0(843K)',
 '8.5\xa0(906K)',
 '8.5\xa0(948K)',
 '8.6\xa0(358K)',
 '8.5\xa0(715K)',
 '8.5\xa0(1.6M)',
 '8.5\xa0(1.1M)',
 '8.5\xa0(1.2M)',
 '8.5\xa0(1.4M)',
 '8.5\xa0(1.2M)',
 '8.5\xa0(980K)',
 '8.5\xa0(1.4M)',
 '8.5\xa0(308K)',
 '8.6\xa0(68K)',
 '8.5\xa0(1.1M)',
 '8.5\xa0(604K)',
 '8.5\xa0(924K)',
 '8.5\xa0(281K)',
 '8.5\xa0(258K)',
 '8.5\xa0(520K)',
 '8.5\xa0(348K)',
 '8.5\xa0(945K)',
 '8.5\xa0(195K)',
 '9.0\xa0(105K)',
 '8.4\xa0(707K)',
 

In [34]:
clean_ratings = [item.split('\xa0') for item in cl_ratings]

In [35]:
clean_ratings

[['9.3', '(2.9M)'],
 ['9.2', '(2M)'],
 ['9.0', '(2.9M)'],
 ['9.0', '(1.4M)'],
 ['9.0', '(859K)'],
 ['9.0', '(1.4M)'],
 ['9.0', '(2M)'],
 ['8.9', '(2.2M)'],
 ['8.9', '(2M)'],
 ['8.8', '(807K)'],
 ['8.8', '(2.2M)'],
 ['8.8', '(1.8M)'],
 ['8.8', '(2.3M)'],
 ['8.9', '(248K)'],
 ['8.8', '(2.5M)'],
 ['8.7', '(1.4M)'],
 ['8.7', '(2M)'],
 ['8.7', '(1.3M)'],
 ['8.7', '(1.1M)'],
 ['8.6', '(1.8M)'],
 ['8.7', '(2.1M)'],
 ['8.6', '(497K)'],
 ['8.6', '(365K)'],
 ['8.6', '(1.5M)'],
 ['8.6', '(1.5M)'],
 ['8.6', '(797K)'],
 ['8.6', '(740K)'],
 ['8.6', '(1.4M)'],
 ['8.6', '(1.2M)'],
 ['8.6', '(1.4M)'],
 ['8.5', '(1.3M)'],
 ['8.6', '(843K)'],
 ['8.5', '(906K)'],
 ['8.5', '(948K)'],
 ['8.6', '(358K)'],
 ['8.5', '(715K)'],
 ['8.5', '(1.6M)'],
 ['8.5', '(1.1M)'],
 ['8.5', '(1.2M)'],
 ['8.5', '(1.4M)'],
 ['8.5', '(1.2M)'],
 ['8.5', '(980K)'],
 ['8.5', '(1.4M)'],
 ['8.5', '(308K)'],
 ['8.6', '(68K)'],
 ['8.5', '(1.1M)'],
 ['8.5', '(604K)'],
 ['8.5', '(924K)'],
 ['8.5', '(281K)'],
 ['8.5', '(258K)'],
 ['8.5', 

In [36]:
## As the result is in list, so using index to filter out the needed data.

ratings_clean = [item[0].split(',')[0] for item in clean_ratings]


In [37]:
ratings_clean

['9.3',
 '9.2',
 '9.0',
 '9.0',
 '9.0',
 '9.0',
 '9.0',
 '8.9',
 '8.9',
 '8.8',
 '8.8',
 '8.8',
 '8.8',
 '8.9',
 '8.8',
 '8.7',
 '8.7',
 '8.7',
 '8.7',
 '8.6',
 '8.7',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.6',
 '8.5',
 '8.6',
 '8.5',
 '8.5',
 '8.6',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.6',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '8.5',
 '9.0',
 '8.4',
 '8.5',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.4',
 '8.3',
 '8.3',
 '8.4',
 '8.4',
 '8.3',
 '8.4',
 '8.3',
 '8.4',
 '8.4',
 '8.3',
 '8.3',
 '8.4',
 '8.3',
 '8.4',
 '8.4',
 '8.3',
 '8.3',
 '8.4',
 '8.3',
 '8.4',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.3',
 '8.2',
 '8.2',
 '8.3',
 '8.3',
 '8.3',
 '8.2',
 '8.2',
 '8.3',
 '8.2',


In [38]:
# Now, 2 lists are there 'movies_clean' and 'ratings_clean'. Now converting both as dictionary

dict = {'Movie_Name':movies_clean, 'Ratings':ratings_clean}

In [39]:
dict

{'Movie_Name': ['The Shawshank Redemption',
  'The Godfather',
  'The Dark Knight',
  'The Godfather: Part II',
  '12 Angry Men',
  "Schindler's List",
  'The Lord of the Rings: The Return of the King',
  'Pulp Fiction',
  'The Lord of the Rings: The Fellowship of the Ring',
  'Il Buono, Il Brutto, Il Cattivo',
  'Forrest Gump',
  'The Lord of the Rings: The Two Towers',
  'Fight Club',
  'Dune: Part Two',
  'Inception',
  'Star Wars: Episode V - The Empire Strikes Back',
  'The Matrix',
  'GoodFellas',
  "One Flew Over the Cuckoo's Nest",
  'Se7en',
  'Interstellar',
  "It's a Wonderful Life",
  'Shichinin No Samurai',
  'The Silence of the Lambs',
  'Saving Private Ryan',
  'City of God',
  'Life Is Beautiful',
  'The Green Mile',
  'Terminator 2: Judgment Day',
  'Star Wars: Episode IV - A New Hope',
  'Back to the Future',
  'Spirited Away',
  'The Pianist',
  'Parasite',
  'Spider-man: Across the Spider-verse',
  'Psycho',
  'Gladiator',
  'The Lion King',
  'Léon',
  'The Departe

In [40]:
# Now converting the dictionary into a DataFrames

df=pd.DataFrame(dict)   #columns = ['Movie_Name','Ratings'])

In [41]:
df

Unnamed: 0,Movie_Name,Ratings
0,The Shawshank Redemption,9.3
1,The Godfather,9.2
2,The Dark Knight,9.0
3,The Godfather: Part II,9.0
4,12 Angry Men,9.0
...,...,...
245,The Help,8.1
246,It Happened One Night,8.1
247,Dances with Wolves,8.0
248,Aladdin,8.0


In [60]:
# Trying on additional column like Year

In [42]:
soup.find_all('span', class_="sc-b0691f29-8");

In [43]:
year_raw = soup.find_all('span', class_="sc-b0691f29-8")

In [44]:
cl_year = []


for i in year_raw:
    cl_year.append(i.get_text().strip())
    
print(cl_year)  

['1994', '2h 22m', 'A', '1972', '2h 55m', 'A', '2008', '2h 32m', 'UA', '1974', '3h 22m', 'A', '1957', '1h 36m', 'U', '1993', '3h 15m', 'A', '2003', '3h 21m', 'U', '1994', '2h 34m', 'A', '2001', '2h 58m', 'U', '1966', '2h 41m', 'A', '1994', '2h 22m', 'UA', '2002', '2h 59m', 'UA', '1999', '2h 19m', 'A', '2024', '2h 46m', 'PG-13', '2010', '2h 28m', 'UA', '1980', '2h 4m', 'UA', '1999', '2h 16m', 'A', '1990', '2h 25m', 'A', '1975', '2h 13m', 'A', '1995', '2h 7m', 'A', '2014', '2h 49m', 'UA', '1946', '2h 10m', 'PG', '1954', '3h 27m', 'U', '1991', '1h 58m', 'A', '1998', '2h 49m', 'A', '2002', '2h 10m', 'A', '1997', '1h 56m', 'U', '1999', '3h 9m', 'UA', '1991', '2h 17m', 'A', '1977', '2h 1m', 'U', '1985', '1h 56m', 'U', '2001', '2h 5m', 'U', '2002', '2h 30m', '13', '2019', '2h 12m', 'A', '2023', '2h 20m', 'U', '1960', '1h 49m', 'A', '2000', '2h 35m', 'UA', '1994', '1h 28m', 'U', '1994', '1h 50m', 'A', '2006', '2h 31m', 'A', '1998', '1h 59m', 'R', '2014', '1h 46m', 'A', '2006', '2h 10m', 'U', '

In [45]:
# As the above result consists of year, duration and rating alternative rows in list. 
# convering it to DataFrame to filter out year.

yr = pd.DataFrame(cl_year, columns=['All_Data'])

In [46]:
yr

Unnamed: 0,All_Data
0,1994
1,2h 22m
2,A
3,1972
4,2h 55m
...,...
743,1h 30m
744,U
745,2015
746,2h 43m


In [47]:
# Now, using .isnumeric() and .len() method to filter out value that has length of 4.
# I'm using this method to filter out year alone.

yr_new = yr[yr['All_Data'].str.isnumeric() & (yr['All_Data'].str.len() == 4)]

In [48]:
# converting the result to list

year = yr_new['All_Data'].to_list()

In [49]:
year

['1994',
 '1972',
 '2008',
 '1974',
 '1957',
 '1993',
 '2003',
 '1994',
 '2001',
 '1966',
 '1994',
 '2002',
 '1999',
 '2024',
 '2010',
 '1980',
 '1999',
 '1990',
 '1975',
 '1995',
 '2014',
 '1946',
 '1954',
 '1991',
 '1998',
 '2002',
 '1997',
 '1999',
 '1991',
 '1977',
 '1985',
 '2001',
 '2002',
 '2019',
 '2023',
 '1960',
 '2000',
 '1994',
 '1994',
 '2006',
 '1998',
 '2014',
 '2006',
 '1988',
 '1962',
 '1995',
 '1942',
 '2011',
 '1988',
 '1936',
 '1954',
 '1968',
 '1979',
 '1931',
 '2023',
 '1979',
 '2012',
 '2000',
 '2008',
 '1981',
 '2006',
 '1950',
 '1957',
 '2018',
 '2018',
 '1980',
 '1957',
 '1940',
 '1986',
 '2009',
 '2012',
 '1964',
 '1999',
 '2003',
 '2017',
 '1984',
 '1995',
 '1981',
 '1995',
 '2019',
 '2019',
 '1997',
 '1997',
 '2016',
 '1984',
 '1963',
 '2009',
 '2023',
 '1952',
 '2018',
 '2000',
 '1985',
 '2010',
 '1983',
 '2004',
 '2012',
 '1968',
 '1992',
 '1952',
 '1962',
 '1960',
 '1959',
 '1941',
 '1958',
 '1931',
 '2010',
 '1944',
 '1983',
 '2001',
 '1987',
 '1971',
 

In [50]:
dict1 = {'Movie_Name':movies_clean, 'Year':year,'Ratings':ratings_clean}

In [51]:
df1 = pd.DataFrame(dict1)

In [52]:
df1

Unnamed: 0,Movie_Name,Year,Ratings
0,The Shawshank Redemption,1994,9.3
1,The Godfather,1972,9.2
2,The Dark Knight,2008,9.0
3,The Godfather: Part II,1974,9.0
4,12 Angry Men,1957,9.0
...,...,...,...
245,The Help,2011,8.1
246,It Happened One Night,1934,8.1
247,Dances with Wolves,1990,8.0
248,Aladdin,1992,8.0


In [147]:
# To download the result in csv file, using .to_csv() method

In [64]:
df1.to_csv("Top_250_movies_test.csv", index=False)