</p>

# Amazon Web Scrapping with Python

This is an introductory project on web scraping and how to automate the price monitoring of any product on Amazon or any other e-commerce website to take advantage of holiday sales.

<h2 id="import_data">Import Libararies</h2>

In [None]:
# Import libraries 

from bs4 import BeautifulSoup
import requests
import time
import datetime
import csv
import pandas as pd

import smtplib

<h2 id="import_data">Implement Web Scraping</h2>

In [2]:
# Connect to website

URL = 'https://www.amazon.com/Data-Analyst-T-Shirt-Male-Boss/dp/B09T5BKPS3/ref=sr_1_2?crid=3QC740Q5M8190&keywords=data%2Banalyst%2Btshirt&qid=1672187720&sprefix=data%2Banalyst%2Btshirt%2Caps%2C145&sr=8-2&th=1&psc=1'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

# Pull in data
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, 'html.parser')
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')


In [3]:
# Pull in product title & price specifically

title = soup2.find(id='productTitle').get_text()
price = soup2.find(id='corePriceDisplay_desktop_feature_div').get_text()

print(title)
print(price)


                Data Analyst T-Shirt for Men,Him, Male, Boss, Son - What's Your Superpower Tee Top - Men Clothing Shirt for Data Analyst
               





               $25.75
              


                $
               

                25
                
                 .
                


                75
               














In [4]:
# Clean up the data a little bit

price = price.strip()[1:6] # Remove leading and trailing whitespace and extract the first 5 characters
title = title.strip() # Remove leading and trailing whitespace

print(title) #having string as type
print(price) #having string as type

Data Analyst T-Shirt for Men,Him, Male, Boss, Son - What's Your Superpower Tee Top - Men Clothing Shirt for Data Analyst
25.75


In [5]:
# Create a Timestamp for your output to track when data was collected

import datetime

today = datetime.date.today()
print(today)

2022-12-27


<h2 id="import_data">Storing Scraped Data</h2>

In [6]:
# Create CSV and write headers and data into the file

import csv 

header = ['Product Title', 'Price', 'Date']
data = [title, price, today] #converting into a list


with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)

In [7]:
# Read dataset without opening the csv file repeatedly

import pandas as pd

df = pd.read_csv(r'C:\Users\Muham\Portfolio Projects\AmazonWebScraperDataset.csv')
df

Unnamed: 0,Product Title,Price,Date
0,"Data Analyst T-Shirt for Men,Him, Male, Boss, ...",25.75,2022-12-27


In [8]:
# Appending data to the csv

with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(data)

<h2 id="import_data">Automate Web Scraping & Storing Data</h2>

In [9]:
# Combine all of the above code into one function

def check_price():
    URL = 'https://www.amazon.com/Data-Analyst-T-Shirt-Male-Boss/dp/B09T5BKPS3/ref=sr_1_2?crid=3QC740Q5M8190&keywords=data%2Banalyst%2Btshirt&qid=1672187720&sprefix=data%2Banalyst%2Btshirt%2Caps%2C145&sr=8-2&th=1&psc=1'
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

    page = requests.get(URL, headers=headers)
    soup1 = BeautifulSoup(page.content, 'html.parser')
    soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

    title = soup2.find(id='productTitle').get_text()
    price = soup2.find(id='corePriceDisplay_desktop_feature_div').get_text()

    price = price.strip()[1:6]
    title = title.strip()
  
    today = datetime.date.today()    

    header = ['Product Title', 'Price', 'Date']
    data = [title, price, today]

    with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
    
    # Convert the price to a float
    price = float(price) #in order to compare it to the integer value 20
    
    if(price < 20):
        send_mail()

In [None]:
# Runs check_price after a set time and inputs data into your CSV

while(True):
    check_price()
    time.sleep(86400) #every 24 hours

<h2 id="import_data">Additional: Send Notification Email</h2>

In [None]:
# If you may want to send yourself/someone an email when a price hits below a certain threshold (say below 20)

def send_mail():
    try:
        # Create an SMTP_SSL object
        server = smtplib.SMTP_SSL('smtp.gmail.com',465)
        server.ehlo()
        #server.starttls()
        server.ehlo()
        
        # Login to the SMTP server
        server.login('youremail@gmail.com','passowrd')
        
        # Construct the email message

        subject = "The Shirt you want is below $15! Now is your chance to buy!"
        body = "Hey you, This is the moment we have been waiting for. Now is your chance to pick up the shirt of your dreams. Don't mess it up! Link here: https://www.amazon.com/Data-Analyst-T-Shirt-Male-Boss/dp/B09T5BKPS3/ref=sr_1_2?crid=3QC740Q5M8190&keywords=data%2Banalyst%2Btshirt&qid=1672187720&sprefix=data%2Banalyst%2Btshirt%2Caps%2C145&sr=8-2&th=1&psc=1"

        msg = f"Subject: {subject}\n\n{body}"
        
        # Send the email
        server.sendmail('youremail@gmail.com', msg)
        
        # Close the connection
        server.close()
        
    except Exception as e:
        print(f"An error occurred while sending the email: {e}")