In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import requests
from bs4 import BeautifulSoup

In [272]:
# Import csv file downloaded from yahoo finance from August 11, 2020
apple_chart = pd.read_csv('AAPL.csv')

In [273]:
# Drop the unneccessary columns
apple_chart.drop(columns = ['High', 'Close','Low', 'Volume'],inplace = True)
apple_chart

Unnamed: 0,Date,Open,Adj Close
0,2015-08-11,117.809998,104.877724
1,2015-08-12,112.529999,106.494896
2,2015-08-13,116.040001,106.411743
3,2015-08-14,114.320000,107.160263
4,2015-08-17,116.040001,108.269211
...,...,...,...
1255,2020-08-05,437.510010,439.457642
1256,2020-08-06,441.619995,454.790009
1257,2020-08-07,452.820007,444.450012
1258,2020-08-10,450.399994,450.910004


### The purpose of this notebook is to get updates from yahoo finance daily to not download a csv file everyday

In [436]:
# Request the yahoo finance historical data
page = requests.get('https://finance.yahoo.com/quote/AAPL/history?p=AAPL')
soup = BeautifulSoup(page.content, 'html.parser')

In [437]:
# This will tell us all the historical data on the page
info = soup.find('table', class_ = 'W(100%) M(0)')
info

<table class="W(100%) M(0)" data-reactid="33" data-test="historical-prices"><thead data-reactid="34"><tr class="C($tertiaryColor) Fz(xs) Ta(end)" data-reactid="35"><th class="Ta(start) W(100px) Fw(400) Py(6px)" data-reactid="36"><span data-reactid="37">Date</span></th><th class="Fw(400) Py(6px)" data-reactid="38"><span data-reactid="39">Open</span></th><th class="Fw(400) Py(6px)" data-reactid="40"><span data-reactid="41">High</span></th><th class="Fw(400) Py(6px)" data-reactid="42"><span data-reactid="43">Low</span></th><th class="Fw(400) Py(6px)" data-reactid="44"><span data-reactid="45">Close*</span></th><th class="Fw(400) Py(6px)" data-reactid="46"><span data-reactid="47">Adj Close**</span></th><th class="Fw(400) Py(6px)" data-reactid="48"><span data-reactid="49">Volume</span></th></tr></thead><tbody data-reactid="50"><tr class="BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)" data-reactid="51"><td class="Py(10px) Ta(start) Pend(10px)" data-reactid="52"><span data-reactid="53">Aug 17

In [438]:
# Instead of lookinf at all the webpage's data, let's look at all the entries for the most recent date
columns = info.find_all('td')
columns[0:7]

[<td class="Py(10px) Ta(start) Pend(10px)" data-reactid="52"><span data-reactid="53">Aug 17, 2020</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="54"><span data-reactid="55">464.25</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="56"><span data-reactid="57">464.35</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="58"><span data-reactid="59">455.85</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="60"><span data-reactid="61">457.23</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="62"><span data-reactid="63">457.23</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="64"><span data-reactid="65">16,521,676</span></td>]

In [356]:
# The date is the first column, so let's extract that information
# I want to see all the information for that date first, so when I call on the new open and adjusted close pirces,
# I know i have the right numbers
new_date = str(columns[0])
new_date = new_date[-24:-12]
new_date

'Aug 17, 2020'

In [361]:
# The open price is the second item in the list, so we'll call on it to extract the numerical information
new_open_price = str(columns[1])
new_open_price = new_open_price[-18:-12]
new_open_price = float(new_open_price)
new_open_price

464.25

In [362]:
# The adjusted close price id the sixth item in the list, so we'll call on it to extract the numerical information
new_adj_close_price = str(columns[5])
new_adj_close_price = new_adj_close_price[-18:-12]
new_adj_close_price = float(new_adj_close_price)
print(new_adj_close_price)

457.82


In [439]:
# Now we want the next dates information, so we move onto the next item in the list
columns[7:14]

[<td class="Py(10px) Ta(start) Pend(10px)" data-reactid="67"><span data-reactid="68">Aug 14, 2020</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="69"><span data-reactid="70">459.32</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="71"><span data-reactid="72">460.00</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="73"><span data-reactid="74">452.18</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="75"><span data-reactid="76">459.63</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="77"><span data-reactid="78">459.63</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="79"><span data-reactid="80">41,352,800</span></td>]

In [386]:
second_date = str(columns[7])
second_date = second_date[-24:-12]
second_date

'Aug 14, 2020'

In [368]:
second_open_price = str(columns[8])
second_open_price = second_open_price[-18:-12]
second_open_price = float(second_open_price)
second_open_price

459.32

In [371]:
second_adj_close_price = str(columns[12])
second_adj_close_price = second_adj_close_price[-18:-12]
second_adj_close_price = float(second_adj_close_price)
print(second_adj_close_price)

459.63


In [384]:
columns[14:21]

[<td class="Py(10px) Ta(start) Pend(10px)" data-reactid="82"><span data-reactid="83">Aug 13, 2020</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="84"><span data-reactid="85">457.72</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="86"><span data-reactid="87">464.17</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="88"><span data-reactid="89">455.71</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="90"><span data-reactid="91">460.04</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="92"><span data-reactid="93">460.04</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="94"><span data-reactid="95">52,520,500</span></td>]

In [387]:
third_date = str(columns[14])
third_date = third_date[-24:-12]
third_date

'Aug 13, 2020'

In [389]:
third_open_price = str(columns[15])
third_open_price = third_open_price[-18:-12]
third_open_price = float(third_open_price)
third_open_price

457.72

In [391]:
third_adj_close_price = str(columns[19])
third_adj_close_price = third_adj_close_price[-18:-12]
third_adj_close_price = float(third_adj_close_price)
print(third_adj_close_price)

460.04


In [412]:
columns[21:28]

[<td class="Py(10px) Ta(start) Pend(10px)" data-reactid="97"><span data-reactid="98">Aug 12, 2020</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="99"><span data-reactid="100">441.99</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="101"><span data-reactid="102">453.10</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="103"><span data-reactid="104">441.19</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="105"><span data-reactid="106">452.04</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="107"><span data-reactid="108">452.04</span></td>,
 <td class="Py(10px) Pstart(10px)" data-reactid="109"><span data-reactid="110">41,486,200</span></td>]

In [414]:
fourth_date = str(columns[21])
fourth_date = fourth_date[-24:-12]
fourth_date

'Aug 12, 2020'

In [416]:
fourth_open_price = str(columns[22])
fourth_open_price = fourth_open_price[-18:-12]
fourth_open_price = float(fourth_open_price)
fourth_open_price

441.99

In [419]:
fourth_adj_close_price = str(columns[26])
fourth_adj_close_price = fourth_adj_close_price[-18:-12]
fourth_adj_close_price = float(fourth_adj_close_price)
print(fourth_adj_close_price)

452.04


In [420]:
# Now that I have all my new information, I want to put it in a dictionary so I can create a dataframe, then 
# concatenate it with the apple_chart data
data = {'Date':[new_date, second_date, third_date, fourth_date],
        'Open':[new_open_price, second_open_price,third_open_price, fourth_open_price],
        'Adj Close':[new_adj_close_price, second_adj_close_price,third_adj_close_price, fourth_adj_close_price]}

In [421]:
new_information = pd.DataFrame(data = data)

In [422]:
new_information

Unnamed: 0,Date,Open,Adj Close
0,"Aug 17, 2020",464.25,457.82
1,"Aug 14, 2020",459.32,459.63
2,"Aug 13, 2020",457.72,460.04
3,"Aug 12, 2020",441.99,452.04


In [423]:
# Set the Date column to the pandas datetime format
new_information['Date'] = pd.to_datetime(new_information.Date)

In [424]:
new_information

Unnamed: 0,Date,Open,Adj Close
0,2020-08-17,464.25,457.82
1,2020-08-14,459.32,459.63
2,2020-08-13,457.72,460.04
3,2020-08-12,441.99,452.04


In [425]:
# The dataframes will not merge unless they're the same type, so let's change the apple_chart date type as well
apple_chart['Date'] = pd.to_datetime(apple_chart.Date)

In [426]:
# Make sure the type changes
apple_chart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1260 entries, 0 to 1259
Data columns (total 3 columns):
Date         1260 non-null datetime64[ns]
Open         1260 non-null float64
Adj Close    1260 non-null float64
dtypes: datetime64[ns](1), float64(2)
memory usage: 29.7 KB


In [435]:
# Now concatenate the dataframes, reset the index and sort by date to have the information in order
combined = pd.concat([apple_chart, new_information], sort = True)
combined.sort_values(by = 'Date', inplace = True)
combined.reset_index(drop = True, inplace = True)
combined.tail(10)

Unnamed: 0,Adj Close,Date,Open
1254,437.870514,2020-08-04,436.529999
1255,439.457642,2020-08-05,437.51001
1256,454.790009,2020-08-06,441.619995
1257,444.450012,2020-08-07,452.820007
1258,450.910004,2020-08-10,450.399994
1259,443.695007,2020-08-11,447.875
1260,452.04,2020-08-12,441.99
1261,460.04,2020-08-13,457.72
1262,459.63,2020-08-14,459.32
1263,457.82,2020-08-17,464.25


### We have successfully updated our dataframe, without redownloading a csv file and  are on our way to creating a function to update our graph daily