**IMPORTING NECESSARY LIBRARIES AND MODULES**

In [19]:
from bs4 import BeautifulSoup
import requests
import time
import smtplib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

**FUNCTIONS TO PERFORM WEB-SCRAPING FROM THE AMAZON WEBPAGE**

In [12]:
#GETTING PRODUCT TITLES
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id":"productTitle"}).text.strip()
    except:
        title = "Unknown Product"
    
    return title      


In [13]:
#GETTING PRODUCT PRICES
def get_price(soup):
    try:
        price_whole = pd.to_numeric(soup.find("span", attrs={"class":"a-price-whole"}).text.strip())
        price_fraction = pd.to_numeric(soup.find("span", attrs={"class":"a-price-fraction"}).text.strip())
        price = price_whole + price_fraction
    except:
        price = "Unknown Price"
    
    return price

In [14]:
#GETTING PRODUCT RATINGS
def get_rating(soup):
    try:
        rating = pd.to_numeric(soup.find("span", attrs={"class":"a-icon-alt"}).text.strip().split()[0])
    except:
        rating = "Unknown Rating"
    
    return rating

In [15]:
#PERFORMING WEB SCRAPING

URL = 'https://www.amazon.com/s?k=t-shirt&crid=1TZ2ZOV4HJN09&sprefix=%2Caps%2C186&ref=nb_sb_ss_recent_3_0_recent'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36","Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup = BeautifulSoup(page.content, "html.parser")
links = soup.find_all("a", attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

links_list = []

for link in links:
    links_list.append(link.get('href'))
    
data = {"title":[], "price":[], "rating":[]}

for link in links_list:
    product_links = "https://www.amazon.com" + link
    product_page = requests.get(product_links, headers=headers)
    product_soup = BeautifulSoup(product_page.content, "html.parser")
    data["title"].append(get_title(product_soup))
    data["price"].append(get_price(product_soup))
    data["rating"].append(get_rating(product_soup))

**LOADING DATAS INTO A PANDAS DATAFRAME, REMOVING UNKNOWN PRODUCTS**

In [35]:
df = pd.DataFrame.from_dict(data)
df['rating'] = df['rating'].replace('Unknown Rating', np.nan)
df['rating'] = df['rating'].astype(float)
df['price'] = df['price'].replace('Unknown Price', np.nan)
df['price'] = df['price'].astype(float)
df = df.dropna()
df = df.sort_values(by=['rating'], ascending=False)
df = df.reset_index(drop=True)
df

Unnamed: 0,title,price,rating
0,LACOXA 2023 New Cotton T-Shirt Bitcoin 4D Digi...,75.0,5.0
1,INTO THE AM Mens T Shirt - Short Sleeve Crew N...,113.0,4.7
2,"Gildan Men's Crew T-Shirts, Multipack, Style G...",119.0,4.6
3,"Gildan Men's Crew T-Shirts, Multipack, Style G...",112.0,4.6
4,5 Pack Men’s Active Quick Dry Crew Neck T Shir...,119.0,4.6
5,"Champion mens T-shirt, Classic Tee for Men, Me...",119.0,4.6
6,Fruit of the Loom Men's Eversoft Cotton Stay T...,119.0,4.6
7,Gildan Adult DryBlend Workwear T-Shirts with P...,119.0,4.6
8,DAVID ARCHY Men's Cotton Undershirts Moisture-...,138.0,4.6
9,Hanes Men’s Short Sleeve Graphic T-shirt Colle...,119.0,4.6
