## Web Scraper Project
In this project we're going to be web scraping a web page in order to track the price of a product we're interested in and notify ourselves via e-mail when the price drops below desired amount.

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import datetime
import pandas as pd

import smtplib

In [2]:
# https://httpbin.org/get

url = 'https://www.tehnomanija.rs/kucni-aparati/filteri-za-bokale-za-vodu/barrier-filter-za-bokale-bwt-mg-000000000001105163'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Accept-Encoding": "gzip, deflate, br",  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

page = requests.get(url, headers=headers)

soup1 = BeautifulSoup(page.content,'html.parser')
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

title = soup2.find('h1').get_text()
price = soup2.find(class_='product-price-newprice').get_text()

print(title)
print(price)



           BARRIER Filter za bokale BWT Mg
          

             899  RSD
            


In [3]:
# Trimming the data
# Removing RSD from the price to make the data more usable

title = title.strip()
price = price.strip()[:3]

print(title)
print(price)

BARRIER Filter za bokale BWT Mg
899


In [4]:
# Creating a timestamp to track when the data was collected

today = datetime.date.today()
print(today)

2023-08-19


In [5]:
# Creating a csv file to store our header and our data 

import csv

header = ['Title', 'Price', 'Date']
data = [title, price, today]

with open('TehnomanijaFilterDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)

In [7]:
df = pd.read_csv(r'C:\Users\Sara\Desktop\Work\Portfolio Projects\Python Web Scraper\TehnomanijaFilterDataset.csv')
df

Unnamed: 0,Title,Price,Date
0,BARRIER Filter za bokale BWT Mg,899,2023-08-19


In [7]:
# Appending the data into our dataset

# with open('TehnomanijaFilterDataset.csv', 'a+', newline='', encoding='UTF8') as f:
#     writer = csv.writer(f)
#     writer.writerow(data)

In [8]:
def check_price():
    url = 'https://www.tehnomanija.rs/kucni-aparati/filteri-za-bokale-za-vodu/barrier-filter-za-bokale-bwt-mg-000000000001105163'
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Accept-Encoding": "gzip, deflate, br",  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

    page = requests.get(url, headers=headers)

    soup1 = BeautifulSoup(page.content,'html.parser')
    soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

    title = soup2.find('h1').get_text()
    price = soup2.find(class_='product-price-newprice').get_text()
    
    title = title.strip()
    price = price.strip()[:3]
    
    today = datetime.date.today()
    
    header = ['Title', 'Price', 'Date']
    data = [title, price, today]

    with open('TehnomanijaFilterDataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
        
    if(int(price) <= 500):
        send_mail()
    

In [9]:
def send_mail():
    server = smtplib.SMTP_SSL('smtp.gmail.com',465)
    server.ehlo()
    server.starttls()
    server.ehlo()
    server.login('nikolic.sara.5@gmail.com','xxxxxxxxxxxxxx')
    
    subject = "The Filter you want is on discount!"
    body = 'Check it out here :\n' + url
    
    msg = f"Subject: {subject}\n\n{body}"
    
    server.sendmail('nikolic.sara.5@gmail.com', msg)

In [None]:
# Check the price every 24h (86400s)

while(True):
    check_price()
    time.sleep(86400)