From 74200c68818a567c44e3b97efcc05939fc48d6c0 Mon Sep 17 00:00:00 2001 From: Waad AlKhenji Date: Thu, 2 Dec 2021 01:02:21 +0300 Subject: [PATCH 1/4] Movie and TV ratings scraper uploaded --- movie_tv_ratings/README.md | 12 ++++++ movie_tv_ratings/movie_tv_ratings.py | 56 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 movie_tv_ratings/README.md create mode 100644 movie_tv_ratings/movie_tv_ratings.py diff --git a/movie_tv_ratings/README.md b/movie_tv_ratings/README.md new file mode 100644 index 000000000..ecb684052 --- /dev/null +++ b/movie_tv_ratings/README.md @@ -0,0 +1,12 @@ +# MOVIE AND TV SHOW RATINGS SCRAPER +This script scrapes the ratings of movies and TV shows from the IMDB website. + +## INPUT +User input of 'Movies' or 'TV' in command line. + +## OUTPUT +It displays a list of the top 250 movies and top 250 TV shows from the IMDB website. + +## AUTHORS +Imaaz Ahmad +Waad AlKhenji \ No newline at end of file diff --git a/movie_tv_ratings/movie_tv_ratings.py b/movie_tv_ratings/movie_tv_ratings.py new file mode 100644 index 000000000..c5246ab51 --- /dev/null +++ b/movie_tv_ratings/movie_tv_ratings.py @@ -0,0 +1,56 @@ +import requests +import re +import sys +from bs4 import BeautifulSoup + +##### Gets top 250 movies from IMDB +def scrape_movies(): + response = requests.get('http://www.imdb.com/chart/top') + soup = BeautifulSoup(response.text, 'lxml') + + movies = soup.select('td.titleColumn') + ratings = [b.attrs.get('data-value') + for b in soup.select('td.posterColumn span[name=ir]')] + + for i in range(len(movies)): + movie_string = movies[i].get_text() + movie = (' '.join(movie_string.split()).replace('.', '')) + movie_title = movie[len(str(i))+1:-7] + print("| " + str(i+1) + " | " + movie_title + " | Rating : " + "{:.1f}".format(float(ratings[i]))) + return + + +##### Gets top 250 TV shows from IMDB +def scrape_tvshows(): + page = requests.get("https://www.imdb.com/chart/toptv") + Results = re.findall(r'" alt="(.+?)".*?title="(.*?)".*?strong.*?"(.*?)"', page.text, re.DOTALL) + for i in range(len(Results)): + print("| " + str(i+1) + " | " + Results[i][0] + " | Rating : " + Results[i][-1][:3]) + + return + + +##### USER INTERFACE ##### + +print("Type 'Movies' to get the Top 250 Movies on IMDB\n") +print("Type 'TV' to get the Top 250 TV Shows on IMDB\n") +print("Type 'exit' to exit\n") + +val = input("Type here: ") +while (val): + if val == 'Movies': + globals()['scrape_movies']() + + print("\n") + + val = input("Type 'Movies' or 'TV' or 'exit': ") + + elif val == 'TV': + globals()['scrape_tvshows']() + print("\n") + val = input("Type 'Movies' or 'TV' or 'exit': ") + + elif val == 'exit': + val = '' + else: + val = input("Wrong Input. Try Again: ") From 26650884d20ebf774a9a18fc350ecf82cdcc9a26 Mon Sep 17 00:00:00 2001 From: Waad AlKhenji Date: Thu, 2 Dec 2021 01:15:02 +0300 Subject: [PATCH 2/4] Movie and TV ratings scraper - fixing lint with flake8 errors --- movie_tv_ratings/movie_tv_ratings.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/movie_tv_ratings/movie_tv_ratings.py b/movie_tv_ratings/movie_tv_ratings.py index c5246ab51..33d8e8144 100644 --- a/movie_tv_ratings/movie_tv_ratings.py +++ b/movie_tv_ratings/movie_tv_ratings.py @@ -1,36 +1,35 @@ import requests import re -import sys from bs4 import BeautifulSoup -##### Gets top 250 movies from IMDB +# Gets top 250 movies from IMDB def scrape_movies(): + response = requests.get('http://www.imdb.com/chart/top') soup = BeautifulSoup(response.text, 'lxml') movies = soup.select('td.titleColumn') - ratings = [b.attrs.get('data-value') - for b in soup.select('td.posterColumn span[name=ir]')] + ratings = [b.attrs.get('data-value') for b in soup.select('td.posterColumn span[name=ir]')] for i in range(len(movies)): movie_string = movies[i].get_text() movie = (' '.join(movie_string.split()).replace('.', '')) movie_title = movie[len(str(i))+1:-7] - print("| " + str(i+1) + " | " + movie_title + " | Rating : " + "{:.1f}".format(float(ratings[i]))) - return + print(("| " + str(i+1)) + (" | " + movie_title) + (" | Rating : " + "{:.1f}".format(float(ratings[i])))) + return -##### Gets top 250 TV shows from IMDB +# Gets top 250 TV shows from IMDB def scrape_tvshows(): page = requests.get("https://www.imdb.com/chart/toptv") Results = re.findall(r'" alt="(.+?)".*?title="(.*?)".*?strong.*?"(.*?)"', page.text, re.DOTALL) for i in range(len(Results)): print("| " + str(i+1) + " | " + Results[i][0] + " | Rating : " + Results[i][-1][:3]) - - return + + return -##### USER INTERFACE ##### +# USER INTERFACE print("Type 'Movies' to get the Top 250 Movies on IMDB\n") print("Type 'TV' to get the Top 250 TV Shows on IMDB\n") @@ -40,17 +39,13 @@ def scrape_tvshows(): while (val): if val == 'Movies': globals()['scrape_movies']() - print("\n") - val = input("Type 'Movies' or 'TV' or 'exit': ") - elif val == 'TV': globals()['scrape_tvshows']() print("\n") val = input("Type 'Movies' or 'TV' or 'exit': ") - elif val == 'exit': val = '' else: - val = input("Wrong Input. Try Again: ") + val = input("Wrong Input. Try Again: ") \ No newline at end of file From cd7e7e540ef45e6426eac6076c7078e9cd050e63 Mon Sep 17 00:00:00 2001 From: Waad AlKhenji Date: Thu, 2 Dec 2021 01:18:30 +0300 Subject: [PATCH 3/4] Movie and TV ratings scraper - fixing lint with flake8 errors --- movie_tv_ratings/movie_tv_ratings.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/movie_tv_ratings/movie_tv_ratings.py b/movie_tv_ratings/movie_tv_ratings.py index 33d8e8144..b5305401d 100644 --- a/movie_tv_ratings/movie_tv_ratings.py +++ b/movie_tv_ratings/movie_tv_ratings.py @@ -2,6 +2,7 @@ import re from bs4 import BeautifulSoup + # Gets top 250 movies from IMDB def scrape_movies(): @@ -14,8 +15,8 @@ def scrape_movies(): for i in range(len(movies)): movie_string = movies[i].get_text() movie = (' '.join(movie_string.split()).replace('.', '')) - movie_title = movie[len(str(i))+1:-7] - print(("| " + str(i+1)) + (" | " + movie_title) + (" | Rating : " + "{:.1f}".format(float(ratings[i])))) + movie_title = movie[len(str(i)) + 1:-7] + print(("| " + str(i + 1)) + (" | " + movie_title) + (" | Rating : " + "{:.1f}".format(float(ratings[i])))) return @@ -24,7 +25,7 @@ def scrape_tvshows(): page = requests.get("https://www.imdb.com/chart/toptv") Results = re.findall(r'" alt="(.+?)".*?title="(.*?)".*?strong.*?"(.*?)"', page.text, re.DOTALL) for i in range(len(Results)): - print("| " + str(i+1) + " | " + Results[i][0] + " | Rating : " + Results[i][-1][:3]) + print("| " + str(i + 1) + " | " + Results[i][0] + " | Rating : " + Results[i][-1][:3]) return @@ -40,12 +41,13 @@ def scrape_tvshows(): if val == 'Movies': globals()['scrape_movies']() print("\n") - val = input("Type 'Movies' or 'TV' or 'exit': ") + val = input("Type 'Movies' or 'TV' or 'exit': ") elif val == 'TV': globals()['scrape_tvshows']() print("\n") - val = input("Type 'Movies' or 'TV' or 'exit': ") + val = input("Type 'Movies' or 'TV' or 'exit': ") elif val == 'exit': val = '' else: - val = input("Wrong Input. Try Again: ") \ No newline at end of file + val = input("Wrong Input. Try Again: ") + \ No newline at end of file From cf52c86f19816e4ab3f82f585959515f239c39ad Mon Sep 17 00:00:00 2001 From: Waad AlKhenji Date: Thu, 2 Dec 2021 01:20:03 +0300 Subject: [PATCH 4/4] Movie and TV ratings scraper - fixing lint with flake8 errors --- movie_tv_ratings/movie_tv_ratings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/movie_tv_ratings/movie_tv_ratings.py b/movie_tv_ratings/movie_tv_ratings.py index b5305401d..6f25b01e4 100644 --- a/movie_tv_ratings/movie_tv_ratings.py +++ b/movie_tv_ratings/movie_tv_ratings.py @@ -50,4 +50,3 @@ def scrape_tvshows(): val = '' else: val = input("Wrong Input. Try Again: ") - \ No newline at end of file