# Scraping Closing prices with Selenium

### Importing Libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import _pickle as pickle
import random
import glob
import os
import pandas as pd
import time

### Function to scrape Prices

In [7]:
def closing_prices(stock):
    """
    Using Selenium to scrape the prices from Yahoo Finance
    """
    
    # Randomize wait times
    seq = [i/10 for i in range(3,12)]
    
    # Options to help load the page faster
    chromeOptions = Options()
    prefs = {"profile.managed_default_content_settings.images":2,
         "profile.default_content_setting_values.notifications":2,
         "profile.managed_default_content_settings.stylesheets":2,
         "profile.managed_default_content_settings.cookies":1,
         "profile.managed_default_content_settings.javascript":1,
         "profile.managed_default_content_settings.plugins":2,
         "profile.managed_default_content_settings.popups":2,
         "profile.managed_default_content_settings.geolocation":2,
         "profile.managed_default_content_settings.media_stream":2}
    chromeOptions.add_experimental_option("prefs",prefs)
    chromeOptions.add_extension(r"/Users/flatironschool/Desktop/extension_1_22_4_0.crx")

    # Opening a browser to google search
    driver = webdriver.Chrome(options=chromeOptions)
    driver.set_window_size(720,720)
    driver.implicitly_wait(2)
    driver.get("https://google.com")

    # Searching for the stock by ticker
    search = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
    search.send_keys(f"{stock.upper()} yahoo finance")
    search.send_keys(Keys.RETURN)
    
    # Clicking the top google result
    try:
        search_res = driver.find_element(By.XPATH, "//*[@id='rso']/div/div/div[1]/div/div/div[1]/a/h3")
        search_res.click()
    except:
        try:
            search_res = driver.find_element(By.XPATH, "//*[@id='rso']/div[1]/div/div/div/div[1]/a/h3")
            search_res.click()
        except:
            search_res = driver.find_element_by_tag_name('h3')
            search_res.click()

    # Clicking the historical data
    hist_but = driver.find_element(By.XPATH, "//*[@id='quote-nav']/ul/li[6]/a")
    hist_but.click()
    time.sleep(random.choice(seq))

    # Clicking the date range
    date_rg = driver.find_element(By.XPATH, "//input[@class='C(t) O(n):f Tsh($actionBlueTextShadow) Bd(n) Bgc(t) Fz(14px) Pos(r) T(-1px) Bd(n):f Bxsh(n):f Cur(p) W(190px)']")
    date_rg.click()
    time.sleep(random.choice(seq))

    # Clicking "Max", then "Done", then "Apply"
    max_but = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/div[1]/span[2]/div/div[1]/span[8]")
    max_but.click()
    time.sleep(random.choice(seq))

    done = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/div[1]/span[2]/div/div[3]/button[1]")
    done.click()
    time.sleep(random.choice(seq))

    apply = driver.find_element(By.XPATH, "//*[@id='Col1-1-HistoricalDataTable-Proxy']/section/div[1]/div[1]/button")
    apply.click()
    time.sleep(random.choice(seq))

    # Finally downloading the CSV
    download = driver.find_element(By.XPATH, "//a[@class='Fl(end) Mt(3px) Cur(p)']")
    download.click()
    time.sleep(random.choice(seq))

    # Closing the window
    driver.quit()

## User input for a specific stock
User can select any stock desired

In [8]:
stock = input("Which stock would you like? ")
closing_prices(stock)

Which stock would you like? f - ford motor


### Small function to retrieve the recently download csv file

In [51]:
def grab_csv():
    """
    Loading in the csv file with closing prices downloaded from Yahoo finance
    """
    # File list retrieved from local download folder
    list_of_files = glob.glob('../../../Downloads/*.csv') 
    
    # Assigning the most recent file
    latest_file = max(list_of_files, key=os.path.getctime)
    return pd.read_csv(latest_file)

In [54]:
# Checking the dataframe
df = grab_csv()
df.tail(1000)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
5908,2015-10-30,44.779999,46.619999,44.750000,46.119999,38.084156,3353700
5909,2015-11-02,46.139999,46.779999,44.520000,46.709999,38.571362,3946000
5910,2015-11-03,46.720001,48.160000,46.720001,47.369999,39.116364,3273800
5911,2015-11-04,47.439999,47.590000,45.820000,46.209999,38.158474,2590800
5912,2015-11-05,46.320000,46.759998,45.849998,46.660000,38.530067,2924500
5913,2015-11-06,46.540001,47.200001,46.200001,46.570000,38.455746,4085500
5914,2015-11-09,46.009998,46.200001,43.660000,43.980000,36.317032,6119500
5915,2015-11-10,43.630001,45.750000,43.509998,45.599998,37.654758,6824800
5916,2015-11-11,44.000000,44.040001,42.189999,43.160000,35.639900,9828300
5917,2015-11-12,45.430000,47.240002,45.000000,45.790001,37.811661,19188600


## Exporting the newly created DF

In [58]:
with open("stock_df.pkl", "wb") as fp:
    pickle.dump(df, fp)