# Let's scrape our webpage and update our CSV file

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [4]:
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}

response = requests.get("https://www.google.com/search?q=nasdaq:nvda", headers=headers)
doc = BeautifulSoup(response.content)

In [9]:
current_price = doc.select_one(".IsqQVc.NprOob.wT3VGc").text
current_price

'446.80'

|ticker|timestamp|price|
|---|---|---|
|NVDA|3am|400|
|NVDA|4am|450|

In [20]:
import datetime

row = {
    'ticker': 'NVDA',
    'price': float(current_price),
    'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
row

{'ticker': 'NVDA', 'price': 446.8, 'timestamp': '2023-08-05 09:42:23'}

In [21]:
df = pd.DataFrame([row])
df

Unnamed: 0,ticker,price,timestamp
0,NVDA,446.8,2023-08-05 09:42:23


In [22]:
# If it exists, open it
# If it doesn't, just make a blank dataframe
# could also use os.path.exists to check if the file exists
# but honestly try/except is the easiest route to go here
try:
    existing_df = pd.read_csv("prices.csv")
except:
    existing_df = pd.DataFrame([])
existing_df.head()

Unnamed: 0,ticker,price,timestamp
0,NVDA,446.8,2023-08-05 09:24:59.844353


In [23]:
# Combine our new dataframe and our old dataframe
# ignore_index=True 
combined = pd.concat([df, existing_df], ignore_index=True)
combined.head()

Unnamed: 0,ticker,price,timestamp
0,NVDA,446.8,2023-08-05 09:42:23
1,NVDA,446.8,2023-08-05 09:24:59.844353


In [24]:
combined.to_csv("prices.csv", index=False)