In [55]:
# Importing Libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [56]:
# Function to extract product title from the HTML soup
def extract_product_title(soup):
    
    try:
    
        title_tag = soup.find('span', attrs={'id': 'productTitle'})  # Find the outer tag containing the title
        title_value = title_tag.text  # Get the inner NavigableString object
        title = title_value.strip()  # Extract the title as a string value
    
    except AttributeError:
    
        title = ""  # Return an empty string if the title is not found
    
    return title


# Function to extract product price from the HTML soup
def extract_product_price(soup):
    try:
        price_tag = soup.find('span', attrs={'class': 'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).find('span', attrs={'class': 'a-offscreen'})
        price = price_tag.text.strip()  # Extract the price as a string value
    except AttributeError:
        try:
            price = soup.find("span", attrs={'id': 'priceblock_dealprice'}).string.strip()  # If there is a deal price
        except:
            price = ""  # Return an empty string if the price is not found
    return price


# Function to extract product rating from the HTML soup
def extract_product_rating(soup):
    try:
        rating_tag = soup.find("i", attrs={'class': 'a-icon a-icon-star a-star-4-5'})
        rating = rating_tag.string.strip()  # Extract the rating as a string value
    except AttributeError:
        try:
            rating = soup.find('span', attrs={'class': "a-icon-alt"}).text.strip()  # If the rating is in a different format
        except:
            rating = ""  # Return an empty string if the rating is not found
    return rating


# Function to extract the number of user reviews from the HTML soup
def extract_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id': 'acrCustomerReviewText'}).text.strip()  # Extract the review count as a string value
    except AttributeError:
        review_count = ""  # Return an empty string if the review count is not found
    return review_count


# Function to extract the availability status from the HTML soup
def extract_availability(soup):
    try:
        availability_tag = soup.find("div", attrs={'id': 'availability'}).find("span")
        availability = availability_tag.string.strip()  # Extract the availability status as a string value
    except AttributeError:
        availability = "Not Available"  # Set the availability status to "Not Available" if it is not found
    return availability



In [57]:
if __name__ == '__main__':
    # Define the user agent and the URL of the webpage to scrape
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50',
        'Accept-Language': 'en-US, en;q=0.5'
    }
    url = "https://www.amazon.de/s?k=playstation+5+games&__mk_de_DE=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=320Z2N1WUM1SW&sprefix=playstation+5+games%2Caps%2C305&ref=nb_sb_noss_1"

    # Send an HTTP request to retrieve the webpage
    webpage = requests.get(url, headers=headers)

    # Create a Soup object to parse the webpage content
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as a list of Tag objects
    links = soup.find_all("a", attrs={'class': 'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop to extract links from Tag objects
    for link in links:
        links_list.append(link.get('href'))

    # Create a dictionary to store the product details
    product_details = {"title": [], "price": [], "rating": [], "reviews": [], "availability": []}

    # Loop to extract product details from each link
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.de" + link, headers=headers)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Extract product information and append to the respective lists in the dictionary
        product_details['title'].append(extract_product_title(new_soup))
        product_details['price'].append(extract_product_price(new_soup))
        product_details['rating'].append(extract_product_rating(new_soup))
        product_details['reviews'].append(extract_review_count(new_soup))
        product_details['availability'].append(extract_availability(new_soup))

    # Create a pandas DataFrame from the product details dictionary
    amazon_df = pd.DataFrame.from_dict(product_details)

    # Save the DataFrame as a CSV file
    amazon_df.to_csv("web_scraping_results_amazon.csv", header=True, index=False)


In [58]:
amazon_df


Unnamed: 0,title,price,rating,reviews,availability
0,Final Fantasy XVI - Steelbook Edition [Amazon ...,"79,99€",Previous page,,Dieser Artikel erscheint am 22. Juni 2023.
1,Tropico 5 - Game of the Year Edition [PC],"19,99€","3,9 von 5 Sternen",38 Sternebewertungen,Nur noch 7 auf Lager
2,Gran Turismo 7 | Standard Edition [PlayStation 5],"39,99€","4,6 von 5 Sternen",2.611 Sternebewertungen,Auf Lager
3,Horizon Forbidden West [PlayStation 5],"39,99€","4,7 von 5 Sternen",2.620 Sternebewertungen,Auf Lager
4,Ratchet & Clank: Rift Apart [PlayStation 5],"44,60€","4,8 von 5 Sternen",2.677 Sternebewertungen,Nur noch 1 auf Lager
5,God of War Ragnarök [PlayStation 5] 100% Uncut,"54,95€","4,8 von 5 Sternen",1.027 Sternebewertungen,Auf Lager
6,Marvel's Spider-Man: Miles Morales [PlayStatio...,"36,88€","4,6 von 5 Sternen",3.121 Sternebewertungen,
7,Grand Theft Auto V - [Playstation 5],"23,47€","4,7 von 5 Sternen",323 Sternebewertungen,Auf Lager
8,Uncharted Legacy of Thieves Collection [PlaySt...,"24,99€","4,7 von 5 Sternen",1.036 Sternebewertungen,Nur noch 12 auf Lager
9,Demon's Souls [PlayStation 5],"48,50€","4,7 von 5 Sternen",2.749 Sternebewertungen,Auf Lager
