# Scraping Closing prices with Selenium

### Importing Libraries

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import _pickle as pickle
import random
import glob
import os
import pandas as pd
import time

### Function to scrape Prices

In [7]:
def closing_prices(stock):
    """
    Using Selenium to scrape the prices from Yahoo Finance
    """
    
    # Randomize wait times
    seq = [i/10 for i in range(5,15)]
    
    # Options to help load the page faster
    chromeOptions = Options()
    prefs = {"profile.managed_default_content_settings.images":2,
         "profile.default_content_setting_values.notifications":2,
         "profile.managed_default_content_settings.stylesheets":2,
         "profile.managed_default_content_settings.cookies":1,
         "profile.managed_default_content_settings.javascript":1,
         "profile.managed_default_content_settings.plugins":2,
         "profile.managed_default_content_settings.popups":2,
         "profile.managed_default_content_settings.geolocation":2,
         "profile.managed_default_content_settings.media_stream":2}
    chromeOptions.add_experimental_option("prefs",prefs)
    
    # Opening a browser to google search
    driver = webdriver.Chrome(options=chromeOptions)
    driver.implicitly_wait(2)
    driver.get("https://google.com")

    # Searching for the stock by ticker
    search = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
    search.send_keys(f"{stock.upper()} yahoo finance")
    search.send_keys(Keys.RETURN)
    
    # Clicking the top google result
    search_res = driver.find_element(By.XPATH, "//div[@class='ellip']")
    search_res.click()

    # Clicking the historical data
    hist_but = driver.find_element(By.XPATH, "//*[@id='quote-nav']/ul/li[6]/a")
    hist_but.click()
    time.sleep(random.choice(seq))

    # Clicking the date range
    date_rg = driver.find_element(By.XPATH, "//input[@class='C(t) O(n):f Tsh($actionBlueTextShadow) Bd(n) Bgc(t) Fz(14px) Pos(r) T(-1px) Bd(n):f Bxsh(n):f Cur(p) W(190px)']")
    date_rg.click()
    time.sleep(random.choice(seq))

    # Clicking "Max", then "Done", then "Apply"
    max_but = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/div[1]/span[2]/div/div[1]/span[8]")
    max_but.click()
    time.sleep(random.choice(seq))

    done = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/div[1]/span[2]/div/div[3]/button[1]")
    done.click()
    time.sleep(random.choice(seq))

    apply = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/button")
    apply.click()
    time.sleep(random.choice(seq))

    # Finally downloading the CSV
    download = driver.find_element(By.XPATH, "//a[@class='Fl(end) Mt(3px) Cur(p)']")
    download.click()
    time.sleep(random.choice(seq))

    # Closing the window
    driver.quit()

## User input for a specific stock
User can select any stock desired

In [55]:
stock = input("Which stock would you like? ")
closing_prices(stock)

Which stock would you like? amd


### Small function to retrieve the recently download csv file

In [56]:
def grab_csv():
    """
    Loading in the csv file with closing prices downloaded from Yahoo finance
    """
    # File list retrieved from local download folder
    list_of_files = glob.glob('../../Downloads/*.csv') 
    
    # Assigning the most recent file
    latest_file = max(list_of_files, key=os.path.getctime)
    return pd.read_csv(latest_file)

In [57]:
# Checking the dataframe
df = grab_csv()
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1980-03-17,0.000000,3.302083,3.125000,3.145833,3.145833,219600
1,1980-03-18,0.000000,3.125000,2.937500,3.031250,3.031250,727200
2,1980-03-19,0.000000,3.083333,3.020833,3.041667,3.041667,295200
3,1980-03-20,0.000000,3.062500,3.010417,3.010417,3.010417,159600
4,1980-03-21,0.000000,3.020833,2.906250,2.916667,2.916667,130800
5,1980-03-24,0.000000,2.916667,2.635417,2.666667,2.666667,436800
6,1980-03-25,0.000000,2.750000,2.552083,2.604167,2.604167,645600
7,1980-03-26,0.000000,2.604167,2.427083,2.447917,2.447917,466800
8,1980-03-27,0.000000,2.375000,2.239583,2.375000,2.375000,1129200
9,1980-03-28,0.000000,2.583333,2.458333,2.541667,2.541667,666000


## Exporting the newly created DF

In [58]:
with open("stock_df.pkl", "wb") as fp:
    pickle.dump(df, fp)