In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_imdb_top_100_indian_movies(url):
    
    response = requests.get(url)
    
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    
    names = []
    ratings = []
    years = []
    
    
    movie_items = soup.find_all('div', class_='lister-item-content')
    
    
    for movie in movie_items:
        
        name = movie.find('h3').find('a').text.strip()
        names.append(name)
        
        
        rating = movie.find('span', class_='ipl-rating-star__rating').text.strip()
        ratings.append(rating)
        
        
        year = movie.find('span', class_='lister-item-year').text.strip('()')
        years.append(year)
    
    
    df = pd.DataFrame({
        'Name': names,
        'Rating': ratings,
        'Year': years
    })
    
    return df


url = "https://www.imdb.com/list/ls056092300/"
df = scrape_imdb_top_100_indian_movies(url)
print(df)

                                 Name Rating  Year
0                     Ship of Theseus      8  2012
1                              Iruvar    8.4  1997
2                     Kaagaz Ke Phool    7.8  1959
3   Lagaan: Once Upon a Time in India    8.1  2001
4                     Pather Panchali    8.2  1955
..                                ...    ...   ...
95                        Apur Sansar    8.4  1959
96                        Kanchivaram    8.2  2008
97                    Monsoon Wedding    7.3  2001
98                              Black    8.1  2005
99                            Deewaar      8  1975

[100 rows x 3 columns]


In [3]:
import requests
from bs4 import BeautifulSoup
import json


url = 'https://www.patreon.com/coreyms'


response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

posts = soup.find_all('div', class_='post')


post_details = []


for post in posts:
    
    heading = post.find('h2', class_='post-title').text.strip()
    
    
    date = post.find('time', class_='post-date').text.strip()
    
    
    content = post.find('div', class_='post-content').text.strip()
    
    
    likes = post.find('div', class_='post-likes')['data-likes']
    
    
    video_link = post.find('iframe', class_='youtube-video')['src']
    
    post_details.append({
        'Heading': heading,
        'Date': date,
        'Content': content,
        'Likes': likes,
        'YouTube Link': video_link
    })


post_details_json = json.dumps(post_details, indent=4)


print(post_details_json)

[]


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def scrape_house_details(localities):
    
    titles = []
    locations = []
    areas = []
    emis = []
    prices = []

    for locality in localities:
        
        url = f'https://www.nobroker.in/property/sale/{locality.lower()}/Bangalore?searchParam=W3sibGF0IjoxMi45MjU5ODk1LCJsb24iOjc3LjI5MTYxODgsInBsYWNlSWQiOiJDaElKSFkyYnZiWlMwcnl6X3RPNnZGUUhkQVwiLF9pZCI6WzEyODQ3NzVdXX0=&radius=2.0'

        
        response = requests.get(url)

        
        if response.status_code == 200:
            
            soup = BeautifulSoup(response.content, 'html.parser')

            
            house_containers = soup.find_all('div', class_='card')

            
            for container in house_containers:
                
                title = container.find('h2', class_='heading-6 font-semi-bold nb__1AShY').text.strip()
                location = container.find('div', class_='nb__2CMjv').text.strip()
                area = container.find('div', class_='nb__3oNyC').text.strip()
                emi = container.find('div', class_='font-semi-bold heading-6', text='â¹/month').find_next('div').text.strip()
                price = container.find('div', class_='font-semi-bold heading-6').text.strip()

                
                titles.append(title)
                locations.append(location)
                areas.append(area)
                emis.append(emi)
                prices.append(price)

    
    df = pd.DataFrame({'Title': titles, 'Location': locations, 'Area': areas, 'EMI': emis, 'Price': prices})

    return df


localities = ['Indira Nagar', 'Jayanagar', 'Rajaji Nagar']


house_details_df = scrape_house_details(localities)


print(house_details_df)

Empty DataFrame
Columns: [Title, Location, Area, EMI, Price]
Index: []


In [5]:
import requests
from bs4 import BeautifulSoup


def scrape_product_details(url):
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    response = requests.get(url, headers=headers)

    
    if response.status_code == 200:
        
        soup = BeautifulSoup(response.content, 'html.parser')

        
        product_containers = soup.find_all('div', class_='productCard')

        
        product_names = []
        product_prices = []
        image_urls = []

        
        for container in product_containers[:10]:  
            
            product_name = container.find('p', class_='name').text.strip()
            product_names.append(product_name)

            
            product_price = container.find('span', class_='discountedPrice').text.strip()
            product_prices.append(product_price)

            
            image_url = container.find('img')['src']
            image_urls.append(image_url)

        
        product_details = {
            'Product Name': product_names,
            'Price': product_prices,
            'Image URL': image_urls
        }

        return product_details

    else:
        print('Failed to fetch data from Bewakoof website.')
        return None


bewakoof_url = 'https://www.bewakoof.com/bestseller?sort=popular'


product_details = scrape_product_details(bewakoof_url)


for index, (name, price, url) in enumerate(zip(product_details['Product Name'], product_details['Price'], product_details['Image URL']), start=1):
    print(f'Product {index}:')
    print(f'Name: {name}')
    print(f'Price: {price}')
    print(f'Image URL: {url}')
    print()

In [6]:
import requests
from bs4 import BeautifulSoup


url = 'https://www.cnbc.com/world/?region=world'


response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')


articles = soup.find_all('div', class_='LatestNews-container')


news_details = []


for article in articles:
    
    heading = article.find('a', class_='LatestNews-headline').text.strip()
    
    
    date = article.find('time', class_='LatestNews-timestamp').text.strip()
    
    
    link = article.find('a', class_='LatestNews-headline')['href']
    
    news_details.append({
        'Heading': heading,
        'Date': date,
        'News Link': link
    })


for detail in news_details[:10]:  
    print(f"Heading: {detail['Heading']}")
    print(f"Date: {detail['Date']}")
    print(f"News Link: {detail['News Link']}")
    print('-' * 20)

Heading: How 26-year-old balances public office, working at Google and getting her MBA
Date: 12 Min Ago
News Link: https://www.cnbc.com/2024/03/30/meet-bushra-amiwala-one-of-the-first-gen-z-women-to-hold-public-office-in-the-us.html
--------------------
Heading: Here are the 3 key things we're most focused on in the stock market this week
Date: 12 Min Ago
News Link: https://www.cnbc.com/2024/03/30/the-3-things-we-are-most-focused-on-in-the-stock-market-this-week.html
--------------------
Heading: Pet insurance didn't feel worth it—until my cat needed a $3,000 surgery
Date: 12 Min Ago
News Link: https://www.cnbc.com/2024/03/30/is-pet-insurance-worth-it.html
--------------------
Heading: How this mission-driven chocolate company makes $162 million a year
Date: 12 Min Ago
News Link: https://www.cnbc.com/2024/03/30/how-this-mission-driven-chocolate-company-makes-162-million-a-year.html
--------------------
Heading: Nvidia CEO: Smart people struggle with these 2 traits—but they saved my com

In [7]:
import requests
from bs4 import BeautifulSoup


def scrape_most_downloaded_articles(url):
    
    response = requests.get(url)

    
    if response.status_code == 200:
        print('Successfully fetched data from KeAi Publishing.')

        
        soup = BeautifulSoup(response.content, 'html.parser')

    
        article_containers = soup.find_all('div', class_='articles-title')

        
        paper_titles = []
        dates = []
        authors = []

        
        for container in article_containers:
            
            paper_title = container.find('h3', class_='h2').text.strip()
            paper_titles.append(paper_title)

            
            date = container.find('div', class_='date').text.strip()
            dates.append(date)

            
            author = container.find('div', class_='authors').text.strip()
            authors.append(author)

        
        article_data = {
            'Paper Title': paper_titles,
            'Date': dates,
            'Author': authors
        }

        return article_data

    else:
        print('Failed to fetch data from KeAi Publishing. Status code:', response.status_code)
        return None


most_downloaded_articles_url = 'https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-articles/'


most_downloaded_articles = scrape_most_downloaded_articles(most_downloaded_articles_url)

if most_downloaded_articles:
    
    for index, (title, date, author) in enumerate(zip(most_downloaded_articles['Paper Title'], most_downloaded_articles['Date'], most_downloaded_articles['Author']), start=1):
        print(f'Article {index}:')
        print(f'Paper Title: {title}')
        print(f'Date: {date}')
        print(f'Author: {author}')
        print()
else:
    print('No data to display. Check the code and try again.')
 

Successfully fetched data from KeAi Publishing.
