Write a python program to display IMDB’s Top rated 100 Indian movies’ data
https://www.imdb.com/list/ls056092300/ (i.e. name, rating, year ofrelease) and make data frame.

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_imdb_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to fetch page.")
        return None

import re
def parse_imdb_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    movie_list = soup.find_all('div', class_='lister-item-content')

    movies_data = []
    for movie in movie_list:
        name = movie.find('a').text.strip()
        rating = float(movie.find('span', class_='ipl-rating-star__rating').text.strip())
        year_text = movie.find('span', class_='lister-item-year').text.strip()
        # Extracting only the year from the string using regular expression
        year_match = re.search(r'\d{4}', year_text)
        if year_match:
            year = int(year_match.group())
            movies_data.append({'Name': name, 'Rating': rating, 'Year': year})

return movies_data

def main():
    url = 'https://www.imdb.com/list/ls056092300/'
    html_content = fetch_imdb_page(url)
    if html_content:
        movies_data = parse_imdb_page(html_content)
        df = pd.DataFrame(movies_data)
        print(df)

if __name__ == "__main__":
    main()


                                 Name  Rating  Year
0                     Ship of Theseus     8.0  2012
1                              Iruvar     8.4  1997
2                     Kaagaz Ke Phool     7.8  1959
3   Lagaan: Once Upon a Time in India     8.1  2001
4                     Pather Panchali     8.2  1955
..                                ...     ...   ...
95                        Apur Sansar     8.4  1959
96                        Kanchivaram     8.2  2008
97                    Monsoon Wedding     7.3  2001
98                              Black     8.1  2005
99                            Deewaar     8.0  1975

[100 rows x 3 columns]


Write a python program to scrape details of all the posts from https://www.patreon.com/coreyms .Scrape the
heading, date, content and the likes for the video from the link for the youtube video from the post.

In [15]:
import requests
from bs4 import BeautifulSoup
import re

def fetch_patreon_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print("Failed to fetch page:", e)
        return None

def parse_patreon_page(html):
    try:
        soup = BeautifulSoup(html, 'html.parser')
        post_list = soup.find_all('div', class_='post-card')

        posts_data = []
        for post in post_list:
            heading = post.find('h3', class_='post-card__title').text.strip()
            date = post.find('time', class_='post-card__date')['datetime']
            content = post.find('div', class_='post-card__content').text.strip()
            
            youtube_link_match = re.search(r'https://www.youtube.com/watch\?v=[\w-]+', content)
            youtube_link = youtube_link_match.group() if youtube_link_match else None
            
            likes = None
            if youtube_link:
                youtube_response = requests.get(youtube_link)
                youtube_response.raise_for_status()
                youtube_soup = BeautifulSoup(youtube_response.text, 'html.parser')
                likes_element = youtube_soup.find('button', class_='like-button-renderer-like-button')
                likes = int(likes_element.find('span', class_='yt-uix-button-content').text.replace(',', ''))

            posts_data.append({'Heading': heading, 'Date': date, 'Content': content, 'YouTube Link': youtube_link, 'Likes': likes})

        return posts_data
    except Exception as e:
        print("Error parsing page:", e)
        return None

def main():
    url = 'https://www.patreon.com/coreyms'
    print("Scraping Patreon page...")
    html_content = fetch_patreon_page(url)
    if html_content:
        print("Parsing Patreon page...")
        posts_data = parse_patreon_page(html_content)
        if posts_data:
            for post in posts_data:
                print(post)
                print()
        else:
            print("No post data found.")

if __name__ == "__main__":
    main()



Scraping Patreon page...
Parsing Patreon page...
No post data found.


Write a python program to scrape house details from mentioned URL. It should include house title, location,
area, EMI and price from https://www.nobroker.in/ .Enter three localities which are Indira Nagar, Jayanagar,
Rajaji Nagar.

In [7]:
import requests
from bs4 import BeautifulSoup

def fetch_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to fetch page.")
        return None

def parse_house_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    
    house_list = soup.find_all('div', class_='nb__2JHKO')

    details_list = []
    for house in house_list:
        title = house.find('h2', class_='heading-6').text.strip()
        location = house.find('div', class_='nb__2CMjv').text.strip()
        area = house.find('div', class_='nb__3oNyC').text.strip()
        emi = house.find('div', class_='font-semi-bold heading-6').text.strip()
        price = house.find('div', class_='heading-7').text.strip()

        details_list.append({'Title': title, 'Location': location, 'Area': area, 'EMI': emi, 'Price': price})

    return details_list

def main():
    localities = ['Indira Nagar', 'Jayanagar', 'Rajaji Nagar']
    base_url = 'https://www.nobroker.in/property/sale/'
    
    for locality in localities:
        url = base_url + locality.replace(' ', '-')  
        print(f"Scraping data for {locality}...")
        html_content = fetch_page_content(url)
        if html_content:
            house_details = parse_house_details(html_content)
            for detail in house_details:
                print(detail)
                print()
                else:
            print("No data found.")

if __name__ == "__main__":
    main()


Scraping data for Indira Nagar...
Scraping data for Jayanagar...
Scraping data for Rajaji Nagar...


Write a python program to scrape first 10 product details which include product name , price , Image URL from
https://www.bewakoof.com/bestseller?sort=popular .

In [8]:
import requests
from bs4 import BeautifulSoup

def fetch_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to fetch page.")
        return None

def parse_product_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    
    product_list = soup.find_all('div', class_='productCard')

    details_list = []
    for product in product_list[:10]:  # Extract details for the first 10 products
        name = product.find('h3', class_='product-title').text.strip()
        price = product.find('div', class_='product-price').text.strip()
        image_url = product.find('img', class_='product-image')['src']

        details_list.append({'Name': name, 'Price': price, 'Image URL': image_url})

    return details_list

def main():
    url = 'https://www.bewakoof.com/bestseller?sort=popular'
    print("Scraping product details...")
    html_content = fetch_page_content(url)
    if html_content:
        product_details = parse_product_details(html_content)
        for detail in product_details:
            print(detail)
            print()
            else:
            print("No data found.")

if __name__ == "__main__":
    main()


Scraping product details...


In [None]:
Please visit https://www.cnbc.com/world/?region=world and scrap-
a) headings

b) date
c) News link

In [20]:
import requests
from bs4 import BeautifulSoup

def fetch_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to fetch page.")
        return None

def parse_news_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    news_list = soup.find_all('div', class_='Card-titleContainer')
    date_list = soup.find_all('time', class_='Card-time')
    link_list = soup.find_all('a', class_='Card-headline')

    details_list = []
    for heading, date, link in zip(news_list, date_list, link_list):
        heading_text = heading.text.strip()
        date_text = date['datetime']
        news_link = "https://www.cnbc.com" + link['href']
        details_list.append({'Heading': heading_text, 'Date': date_text, 'News Link': news_link})

    return details_list

def main():
    url = 'https://www.cnbc.com/world/?region=world'
    print("Scraping news details...")
    html_content = fetch_page_content(url)
    if html_content:
        news_details = parse_news_details(html_content)
        for detail in news_details:
            print(detail)
            print()

if __name__ == "__main__":
    main()


Scraping news details...


Please visit https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-
articles/ and scrap-

a) Paper title
b) date
c) Author

In [21]:
import requests
from bs4 import BeautifulSoup

def fetch_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to fetch page.")
        return None

def parse_paper_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    
    paper_list = soup.find_all('div', class_='article-details')

    details_list = []
    for paper in paper_list:
        title = paper.find('h3', class_='article-title').text.strip()
        date = paper.find('div', class_='article-date').text.strip()
        authors = paper.find('div', class_='article-authors').text.strip()

        details_list.append({'Paper Title': title, 'Date': date, 'Authors': authors})

    return details_list

def main():
    url = 'https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-articles/'
    print("Scraping paper details...")
    html_content = fetch_page_content(url)
    if html_content:
        paper_details = parse_paper_details(html_content)
        for detail in paper_details:
            print(detail)
            print()
        else:
            print("No data found.")

if __name__ == "__main__":
    main()


Scraping paper details...
No data found.
