![Tesla Logo](https://www.exoticcargear.com/wp-content/uploads/Tesla-Model-S_EXoticCarGear_banner.jpg)

# Web scraping Tesla stocks from Yahoo Finance

In [1]:
# Import the necessary libraries
from bs4 import BeautifulSoup 
import requests
import numpy as np
import pandas as pd 
import time
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

### Yahoo finance webpage

In [2]:
# Instantiate browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# URL of page to be scraped
url_to_scrape = "https://finance.yahoo.com/quote/TSLA/history?period1=1438041600&period2=1595894400&interval=1d&filter=history&frequency=1d"

# Visit the url using browser.visit method
browser.visit(url_to_scrape)

# Set delay for 1 minute to ensure the webpage loads correctly
time.sleep(60)

In [4]:
# Browser set-up
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Inspecting the title of the web page
print(soup.title.string)

Tesla, Inc. (TSLA) Stock Historical Prices & Data - Yahoo Finance


In [6]:
# Inspecting the website we see that table is wrapped as <table class="W(100%) M(0)">
tesla_table = soup.find('table', attrs={"class": "W(100%) M(0)"})

In [7]:
# Let's find all the headers in the table
table_headers = tesla_table.find_all('th')

# Let's find the column headings from the header
headings = [table_header.text.strip() for table_header in table_headers]
print(headings)

['Date', 'Open', 'High', 'Low', 'Close*', 'Adj Close**', 'Volume']


In [8]:
# Let's check the rows in the table
table_rows = tesla_table.find_all('tr')
print("Total number of rows in the table:",len(table_rows))

Total number of rows in the table: 1261


In [9]:
# Let's check the elements in the table
table_elements = tesla_table.find_all('td')

# Initiate an array row_values
row_values = []

# Fill the array row_values
for rows in table_rows:
     data = rows.find_all('td') # finding the elements in each row
     values = [rows.text.strip() for rows in data if rows.text.strip()]
     if values:
        row_values.append(values) # Adding elements

In [10]:
# Initiate column names for the dataframe
column_names = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

# Create the initial pandas dataframe 
tesla_df = pd.DataFrame(row_values, columns=column_names)

# The last row contained text data and hence is being dropped
tesla_df.drop([tesla_df.index[1259]],inplace=True)

# Let's check the shape of the dataframe
print("Shape of the tesla stock dataFrame:",tesla_df.shape)
tesla_df.head()

Shape of the tesla stock dataFrame: (1259, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,"Jul 27, 2020",1435.0,1547.94,1413.0,1539.6,1539.6,16048700
1,"Jul 24, 2020",1416.01,1465.0,1366.54,1417.0,1417.0,19396600
2,"Jul 23, 2020",1678.95,1689.0,1480.77,1513.07,1513.07,24328500
3,"Jul 22, 2020",1599.0,1626.42,1562.0,1592.33,1592.33,14161100
4,"Jul 21, 2020",1639.93,1675.0,1558.0,1568.36,1568.36,16157300


In [11]:
# Save the dataframe in .csv file
tesla_df.to_csv ("Output Data/tesla_stocks.csv", encoding = "utf-8",index = False, header=True)

In [12]:
# Closing browser using browser.quit:
browser.quit()