In [1]:
# import libraries

from bs4 import BeautifulSoup
import requests
import smtplib
import time
import datetime
import csv
import pandas as pd

In [2]:
# connect to website and pull in data

url = 'https://www.bestbuy.com/site/apple-airpods-pro-with-magsafe-charging-case-white/4900942.p?skuId=4900942'

headers = {"User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"}

page = requests.get(url, headers=headers)

soup1 = BeautifulSoup(page.content, "html.parser")

soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

In [3]:
# grab product name from the web page

title = soup2.find(class_="heading-5 v-fw-regular").get_text()
print(title)


              Apple - AirPods Pro (with Magsafe Charging Case) - White
             


In [4]:
# grab product price from the web page

price = soup2.find('div', 'priceView-hero-price priceView-customer-price').get_text()
print(price)



                      $189.99
                     

                      Your price for this item is $
                      
                      189.99
                     



In [5]:
# clean up data - whitespace in title above

title = soup2.find(class_="heading-5 v-fw-regular").get_text().strip()
print(title)

Apple - AirPods Pro (with Magsafe Charging Case) - White


In [6]:
# clean up data - extra text around price above


price = soup2.find('div', 'priceView-hero-price priceView-customer-price').get_text().split()[0][1:]
print(price)

189.99


In [7]:
# create a timestamp for the output to track when data is collected (and to note when changes in data occurred)

today = datetime.date.today()
print(today)

2022-03-04


In [8]:
# create a csv file, then write headers and data into the file

header = ['Title', 'Price', 'Date']
data = [title, price, today]

with open('bestbuywebscraping.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)

In [9]:
# view the newly created csv file directly inside the notebook instead of needing to open excel

pd.read_csv('bestbuywebscraping.csv')

Unnamed: 0,Title,Price,Date
0,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04


In [10]:
# append a new row of data to the csv file

with open('bestbuywebscraping.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(data)
    
pd.read_csv('bestbuywebscraping.csv')

Unnamed: 0,Title,Price,Date
0,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04
1,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04


In [11]:
# combine the code above into a function

def check_price():
    url = 'https://www.bestbuy.com/site/apple-airpods-pro-with-magsafe-charging-case-white/4900942.p?skuId=4900942'

    headers = {"User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"}

    page = requests.get(url, headers=headers)

    soup1 = BeautifulSoup(page.content, "html.parser")

    soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

    title = soup2.find(class_="heading-5 v-fw-regular").get_text().strip()

    price = soup2.find('div', 'priceView-hero-price priceView-customer-price').get_text().split()[0][1:]

    today = datetime.date.today()

    header = ['Title', 'Price', 'Date']
    data = [title, price, today]

    with open('bestbuywebscraping.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)

In [12]:
# run the function to ensure it worked, view the output

check_price()
pd.read_csv('bestbuywebscraping.csv')

Unnamed: 0,Title,Price,Date
0,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04
1,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04
2,Apple - AirPods Pro (with Magsafe Charging Cas...,189.99,2022-03-04


In [None]:
# sets up automation to run the function above every 24 hours (as long as the notebook is running)
# and appends the csv file each time it runs

while (True):
    check_price()
    time.sleep(86400) #24 hours in seconds