## Web Scraping Project
### To extract movie reviews for "Dune: Part Two (2024)"
### from the IMDb website

In [1]:
import requests
from bs4 import BeautifulSoup


In [2]:
# Dune: Part Two (2024) User Reviews from IMDb
url = "https://www.imdb.com/title/tt15239678/reviews/?ref_=tt_ql_2"


In [3]:
# Create a dictionary to store the extracted information
data = {"title": [], "review": [], 'rating':[], 'date':[]}

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
posts = soup.find_all("div", class_="lister-item")

for post in posts:
    title = post.find("a", class_='title').get_text().strip().replace("\n", "")
    data["title"].append(title)
    
    review = post.find("div", class_=["text", "show-more__control"]).get_text().strip()
    data["review"].append(review)
    
    rating = post.find("span", class_="rating-other-user-rating").find("span", class_= "").get_text()
    data["rating"].append(int(rating))
    
    review_date = post.find("div", class_="display-name-date").find("span", class_= "review-date").get_text()
    data["date"].append(review_date)



In [4]:
print(data["title"])

['This is what Hollywood needs!', 'The Sci-Fi/Fantasy Epic of our Generation', 'Long live the fighters', 'Hard Not to Compare it to the Book and Feel Disappointed', 'WOW! I need Dune Messiah now.', 'Beautiful production, less than perfect script', 'Ladies and gentleman.. the PEAK of filmmaking!!', "Arrakis is Real. Believe Me I've Seen It.", 'A masterpiece, a pillar of sci-fi/fantasy', 'The Dark Knight and Empire Strikes Back of our generation...', 'Hard to put into words', 'Visual masterpiece, questionable narrative', 'Trying to be more than it is, but lacked.', 'RIDICULOUS', 'Paul Atreides is handled masterfully', 'Incredible Film', 'Absolutely stunning', 'Denis Villeneuve Promises and Delivers', 'One Of The Greatest Sequel Ever Made, Dune: Part Two Was Easily The Best Films Of The Year So Far', 'Denis Villeneuve is a modern sorcerer in Sci-Fi!', 'Truly a masterpiece', "I don't love it, and I wanted to.", 'Anyone who says Dune: Part 1 or 2 are good movies is lying.', 'Excellent Visua

In [5]:
print(data["review"])

["This is what Hollywood needs. A great story with a great director/producer. After that the best thing a studio can do is get the hell out of the way and let artists create art.Dune Part 2 is creative, beautiful, tragic, and mesmerizing. Never once did I get bored or anticipate what was going to happen next. I haven't read the book so I have nothing to compare it to.Denis Villeneuve continues to amaze me with the effort he puts into each of his films. The acting in this film was top notch too. We saw it in IMAX and the sound was earth shattering. If you're gonna see this movie, see it on the largest screen possible.", "Had the pleasure to watch this film in an early screening and was completely blown away.As a big fan of the Game of Thrones franchise, it's been a long time since iv'e encountered this level of world-building and epicness. Would highly recommend to re-watch the first movie in order to appreciate the subtleties and foreshadowing better, though it's not completely necessa

In [6]:
print(data["rating"])

[10, 10, 9, 8, 10, 8, 10, 10, 10, 10, 10, 7, 5, 10, 10, 9, 9, 10, 10, 10, 10, 6, 3, 8, 10]


In [7]:
print(data["date"])

['26 February 2024', '26 February 2024', '26 February 2024', '2 March 2024', '26 February 2024', '2 March 2024', '28 February 2024', '4 March 2024', '26 February 2024', '26 February 2024', '28 February 2024', '7 March 2024', '7 April 2024', '2 March 2024', '1 March 2024', '26 February 2024', '26 February 2024', '26 February 2024', '28 February 2024', '26 February 2024', '26 February 2024', '6 March 2024', '5 April 2024', '3 March 2024', '29 February 2024']


In [8]:
# Convert data from dictionary into dataframe
import pandas as pd

df = pd.DataFrame(data)
df

Unnamed: 0,title,review,rating,date
0,This is what Hollywood needs!,This is what Hollywood needs. A great story wi...,10,26 February 2024
1,The Sci-Fi/Fantasy Epic of our Generation,Had the pleasure to watch this film in an earl...,10,26 February 2024
2,Long live the fighters,Phenomenal stuff. I'll probably calm down tomo...,9,26 February 2024
3,Hard Not to Compare it to the Book and Feel Di...,I'm going to write this as a review for both D...,8,2 March 2024
4,WOW! I need Dune Messiah now.,"If you liked or loved the first one, the same ...",10,26 February 2024
5,"Beautiful production, less than perfect script","As with the first film, the production was bea...",8,2 March 2024
6,Ladies and gentleman.. the PEAK of filmmaking!!,This is the kind of movie that is impossible t...,10,28 February 2024
7,Arrakis is Real. Believe Me I've Seen It.,A monumental piece of cinema. And combined wit...,10,4 March 2024
8,"A masterpiece, a pillar of sci-fi/fantasy",This was a perfect sequel to Denis' part one. ...,10,26 February 2024
9,The Dark Knight and Empire Strikes Back of our...,I just got out of an early access showing and ...,10,26 February 2024


In [9]:
# convert date string to datetime object in dataframe
from datetime import datetime

df['date'] = df['date'].apply(lambda x: datetime.strptime(x, "%d %B %Y"))

In [10]:
# get movie reviews written in March 2024
df.loc[df['date'].apply(lambda x: x.month == 3 and x.year==2024)]

Unnamed: 0,title,review,rating,date
3,Hard Not to Compare it to the Book and Feel Di...,I'm going to write this as a review for both D...,8,2024-03-02
5,"Beautiful production, less than perfect script","As with the first film, the production was bea...",8,2024-03-02
7,Arrakis is Real. Believe Me I've Seen It.,A monumental piece of cinema. And combined wit...,10,2024-03-04
11,"Visual masterpiece, questionable narrative",I have to start by saying that I absolutely lo...,7,2024-03-07
13,RIDICULOUS,"We have waited many, many years for a movie of...",10,2024-03-02
14,Paul Atreides is handled masterfully,"""Dune"" has been successfully put to the big sc...",10,2024-03-01
21,"I don't love it, and I wanted to.","Dune Part 2 is an epic movie; slickly made, an...",6,2024-03-06
23,Excellent Visuals but Overly Condensed,This movie is a visual masterpiece that will b...,8,2024-03-03
