# Importing Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import datetime
import csv 
import pandas as pd

import smtplib

# Connecting To The Website Amazon.com

In [2]:
URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

# Pull in Data

In [3]:
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

In [4]:
title = soup2.find(id='productTitle').get_text()
title

'\n                    Funny Got Data MIS Data Systems Business Analyst T-Shirt\n                   '

In [5]:
price = soup2.find(id='corePriceDisplay_desktop_feature_div').get_text()
price

'\n\n\n\n                    $16.99\n                   \n\n\n\n\n\n                      $\n                     \n\n                      16\n                      \n                       .\n                      \n\n\n                      99\n                     \n\n\n\n\n\n\n\n\n\n\n\n'

##### Clean up the data a little bit

In [6]:
price = price.strip()[:6]
title = title.strip()

In [7]:
print(title)
print(price.replace(" ", ""))

Funny Got Data MIS Data Systems Business Analyst T-Shirt
$16.99


# Create a Timestamp for your output to track when data was collected

In [8]:
today = datetime.date.today()

print(today)

2024-06-12


# Create CSV and write headers and data into the file

In [9]:
header = ['Title', 'Price', 'Date']
data = [title, price, today]


with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)

# Loading The Clean Data

In [10]:
df = pd.read_csv('AmazonWebScraperDataset.csv')
print(df)

                                               Title   Price        Date
0  Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-12


#### Now we are appending every new data to the csv

In [11]:
with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(data)

# Automating The Whole Process

#### Combine all of the above code into one function

In [12]:
def check_price():
    URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

    page = requests.get(URL, headers=headers)

    soup1 = BeautifulSoup(page.content, "html.parser")

    soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

    title = soup2.find(id='productTitle').get_text()

    price = soup2.find(id='corePriceDisplay_desktop_feature_div').get_text()

    price = price.strip()[:6]
    title = title.strip()
    import datetime

    today = datetime.date.today()
    
    import csv 

    header = ['Title', 'Price', 'Date']
    data = [title, price, today]

    with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
 
    

# Runs check_price after a set time and inputs data into your CSV

so now we can set a specific time scrap the site with the time module and pull out the price of the T-shirt at a given time

# Final result and how to use this Webscraping and Automation

In [18]:
while(True): #this is everything we did aboved in 3 line of code, the fully automated web scraping
    check_price()
    time.sleep(3) #so this is in seconds
    break 

import pandas as pd

df = pd.read_csv('AmazonWebScraperDataset.csv')

print(df)

                                                Title   Price        Date
0   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-12
1   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-12
2   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
3   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
4   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
5   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
6   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
7   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
8   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
9   Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
10  Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
11  Funny Got Data MIS Data Systems Business Analy...  $16.99  2024-06-13
12  Funny Got Data MIS Data Systems Bu

If you want to try sending yourself an email when a price hits below a certain level you can try it
out with this script

In [None]:
def send_mail():
    server = smtplib.SMTP_SSL('smtp.gmail.com',465)
    server.ehlo()
    #server.starttls()
    server.ehlo()
    server.login('typeyouremail@gmail.com','xxxxxxxxxxxxxx')
    
    subject = "The Shirt you want is below $15! Now is your chance to buy!"
    body = "Alex, This is the moment we have been waiting for. Now is your chance to pick up the shirt of your dreams. Don't mess it up! Link here: https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data+analyst+tshirt&qid=1626655184&sr=8-3"
   
    msg = f"Subject: {subject}\n\n{body}"
    
    server.sendmail(
        'typeyouremail@gmail.com',
        msg
     
    )