In [3]:
# question 1
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_imdb_top_100_indian_movies(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    movies_data = {'Name': [], 'Rating': [], 'Year of Release': []}

    movie_containers = soup.find_all('div', class_='lister-item-content')

    for movie in movie_containers:
        try:
            name = movie.h3.a.text.strip()
        except AttributeError:
            name = None

        try:
            rating = float(movie.find('span', class_='ipl-rating-star__rating').text)
        except (AttributeError, ValueError):
            rating = None

        try:
            year = int(movie.find('span', class_='lister-item-year').text.strip('()'))
        except (AttributeError, ValueError):
            year = None

        movies_data['Name'].append(name)


In [4]:
# question 2
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_meesho_bags(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting product details
    product_details = []
    for product in soup.find_all('div', class_='product-card'):
        name = product.find('h3', class_='product-title').text
        price = float(product.find('span', class_='product-price').text)
        discount = float(product.find('span', class_='product-discount').text.strip('%'))
        product_details.append({'Name': name, 'Price': price, 'Discount': discount})

    # Creating a DataFrame
    df = pd.DataFrame(product_details)
    return df

# Example usage:
url_meesho_bags = 'https://www.meesho.com/bags-ladies/pl/3jo?page=1'
df_meesho_bags = scrape_meesho_bags(url_meesho_bags)
print(df_meesho_bags)

Empty DataFrame
Columns: []
Index: []


In [None]:
# question 3
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_icc_cricket_rankings(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # a) Top 10 ODI teams in men’s cricket
    banners = soup.find_all('tr', class_='rankings-block__banner')
    table_rows = soup.find_all('tr', class_='table-body')

    teams_details = [
        {
            'Team': team.find('span', class_='u-hide-phablet').text,
            'Matches': int(matches.text),
            'Points': int(points.text),
            'Rating': int(rating.text)
        }
        for team, matches, points, rating in zip(banners, table_rows, table_rows, table_rows)
    ]

    # Creating a DataFrame
    df_teams = pd.DataFrame(teams_details)
    return df_teams

# Example usage:
url_icc_cricket_rankings = 'https://www.icc-cricket.com/rankings/mens/team-rankings/odi'
df_teams = scrape_icc_cricket_rankings(url_icc_cricket_rankings)
print(df_teams)

In [10]:
# question 4
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_patreon_posts(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting post details
    post_details = []
    for post in soup.find_all('div', class_='post-content'):
        heading = post.find('h3', class_='post-card__headline').text
        date = post.find('time', class_='post-card__date')['datetime']
        content = post.find('div', class_='post-card__content').text.strip()
        likes = int(post.find('span', class_='post-card__like-count').text)
        post_details.append({'Heading': heading, 'Date': date, 'Content': content, 'Likes': likes})

    # Creating a DataFrame
    df_posts = pd.DataFrame(post_details)
    return df_posts

# Example usage:
url_patreon_posts = 'https://www.patreon.com/coreyms'
df_patreon_posts = scrape_patreon_posts(url_patreon_posts)
print(df_patreon_posts)

Empty DataFrame
Columns: []
Index: []


In [11]:
# question 5
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_nobroker_houses(localities):
    house_details = []

    for locality in localities:
        url = f'https://www.nobroker.in/property/sale/{locality}?searchParam=W3sibGF0IjoxMi45NDAyMTQ1LCJsb24iOjc3LjgzODU5NzYyNTk1MTY2fV0=&radius=2.0'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extracting house details
        for house in soup.find_all('div', class_='card'):
            title = house.find('h2', class_='heading-6').text.strip()
            location = house.find('div', class_='nb__2CMjv').text.strip()
            area = house.find('div', class_='nb__3oNyC').text.strip().split(' ')[0]
            emi = house.find('div', class_='font-semi-bold').text.strip().split(' ')[1]
            price = float(house.find('div', class_='heading-7').text.replace('₹', '').replace('L', ''))

            house_details.append({'Title': title, 'Location': location, 'Area': area, 'EMI': emi, 'Price': price})

    # Creating a DataFrame
    df_houses = pd.DataFrame(house_details)
    return df_houses


In [15]:
# question 6
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_bewakoof_products(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract product details
    product_details = []
    for product in soup.select('.productCardBox'):
        name_elem = product.find('h3', class_='product-title')
        price_elem = product.find('span', class_='original-price')
        img_elem = product.find('div', class_='product-image')

        # Check if elements are found before extract
        if name_elem:
            name = name_elem.text.strip()
        else:
            name = 'N/A'

        if price_elem:
            price = float(price_elem.text.replace('₹', '').replace(',', ''))
        else:
            price = None

        if img_elem and img_elem.img:
            img_url = img_elem.img.get('src', 'N/A')
        else:
            img_url = 'N/A'

        product_details.append({'Name': name, 'Price': price, 'Image_URL': img_url})

    # Creating a DataFrame
    df_products = pd.DataFrame(product_details)
    return df_products

# Example usage:
url_bewakoof_products = 'https://www.bewakoof.com/bestseller?sort=popula'
df_bewakoof_products = scrape_bewakoof_products(url_bewakoof_products)
print(df_bewakoof_products)

  Name Price Image_URL
0  N/A  None       N/A
1  N/A  None       N/A
2  N/A  None       N/A
3  N/A  None       N/A
4  N/A  None       N/A
5  N/A  None       N/A
6  N/A  None       N/A
7  N/A  None       N/A
8  N/A  None       N/A
9  N/A  None       N/A
