In [11]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from csv import writer
from linear_regression import linear_regression

# Apple Inc. Stock Analysis

## Option 1: Web Scraping from Yahoo Finance Webpage 
##### 3 months data

In [12]:
def scrape_data():
    response = requests.get("https://finance.yahoo.com/quote/AAPL/history?p=AAPL")
    soup = BeautifulSoup(response.text, "html.parser")
    articles = soup.find_all(class_="BdT")
    with open("stock_data.csv", "w") as csv_file:
        csv_writer = writer(csv_file)
        csv_writer.writerow(["date", "stock_open", "stock_high", "stock_low", "stock_close"])
        for article in articles:
            if article.next_sibling is not None:
                date = article.find(class_="Pend(10px)").get_text()
                values = article.find_all(class_="Pstart(10px)")
                if values[0].next_sibling is None:
                    dividend = values[0].get_text()
                else:
                    stock_open = values[0].get_text()
                    stock_high = values[1].get_text()
                    stock_low = values[2].get_text()
                    stock_close = values[3].get_text()
                    csv_writer.writerow([date, stock_open, stock_high, stock_low, stock_close])
    return "Data Scraped and Saved"
scrape_data()

'Data Scraped and Saved'

In [13]:
def get_data():
    df = pd.read_csv('stock_data.csv')
    X = df[['stock_open', 'stock_high', 'stock_low']]
    y = df['stock_close']
    return X, y

In [14]:
X, y = get_data()

## Option 2: Using pandas-datareader to read Yahoo Financial Data
##### Yearly data

In [15]:
from pandas_datareader import data
X, y = None, None

In [16]:
aapl = data.DataReader('AAPL', start='2019', end='2020',data_source='yahoo')

In [17]:
aapl.to_csv("hw2_data.csv")

In [18]:
X = aapl[['High','Low','Open']]
y = aapl['Close']

## Performing Regression on the Data Set

In [19]:
B_hat, standard_error, CI_lower, CI_upper, X_processed, y_processed = linear_regression(X,y)

## Print-out Regression Table

In [20]:
df = pd.DataFrame(list(zip(B_hat, standard_error, CI_lower, CI_upper)), 
               columns =['B_hat', 'Standard Error', 'Lower 95%', 'Upper 95%']) 
print(df)

      B_hat  Standard Error  Lower 95%  Upper 95%
0 -0.625701        0.451992  -1.515935   0.264532
1  0.757666        0.053441   0.652410   0.862921
2  0.719667        0.048873   0.623408   0.815926
3 -0.474085        0.058895  -0.590083  -0.358086
