# Section 1: Stock Selection

## This file defines the stocks that a user can select to feed into the program. Stock selection is driven from a dropdown menu that passes a list as an output. This list will be fed into the IEX Finance and Reuters News API queries. 

In [4]:
# importing libraries
from pathlib import Path
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from datetime import datetime, timedelta
from iexfinance.stocks import get_historical_data

In [23]:
# importing list of companies and converting to DataFrame
sp500_csv = Path("Data/sp500_constituents.csv")
sp500_df = pd.read_csv(sp500_csv)
sp500_df['Company'] = sp500_df['Name']
sp500_df.drop(columns='Sector',inplace=True)
sp500_df.set_index('Company', inplace=True)
sp500_df.head()

Unnamed: 0_level_0,Symbol,Name
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
3M Company,MMM,3M Company
A.O. Smith Corp,AOS,A.O. Smith Corp
Abbott Laboratories,ABT,Abbott Laboratories
AbbVie Inc.,ABBV,AbbVie Inc.
Accenture plc,ACN,Accenture plc


In [31]:
# converting DataFrame to a dictionary of lists
stock_dict = sp500_df.T.to_dict('list')

In [32]:
# creating an interactive widget that allows the user to select a company
# TODO - export this widget to a panel along with the output widget to create the app's UI
selector_widget = widgets.Dropdown(options=stock_dict)

display(selector_widget)

Dropdown(options={'3M Company': ['MMM', '3M Company'], 'A.O. Smith Corp': ['AOS', 'A.O. Smith Corp'], 'Abbott …

In [34]:
# saving the output of the selector value as a variable
stock_selection = selector_widget.value

stock_selection[0]

'MMM'

# Section 2: Stock Data Retrieval

## This section pulls the last month of closing prices of a stock, calculates the daily returns, and exports the daily returns to a DataFrame. This DataFrame will be used as the target values for the machine learning model.

## NOTE: to run this section, you must execute an IEX Finance API key.

In [35]:
# setting the ticker to the output from the selector
ticker = stock_selection[0]

# setting start and end date for the past four weeks
# 29 days needed instead of 28 days so that we get 28 days of return when we calculate
end_date = datetime.now()
start_date = end_date + timedelta(-31)

# getting data from the API and adding to DataFrame
df = get_historical_data(ticker,start_date,end_date,output_format='pandas')
df.drop(columns=['open','high','low','volume'],inplace=True)
df.head()

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2020-03-13,141.68
2020-03-16,130.91
2020-03-17,134.44
2020-03-18,137.02
2020-03-19,137.51


In [36]:
# checking need to clean data
df.isnull().sum()

close    0
dtype: int64

In [37]:
# calculating daily returns
returns = df.pct_change() * 100
returns.head(30)

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2020-03-13,
2020-03-16,-7.601637
2020-03-17,2.696509
2020-03-18,1.919072
2020-03-19,0.357612
2020-03-20,-9.177514
2020-03-23,-5.620946
2020-03-24,12.598626
2020-03-25,-0.88909
2020-03-26,3.527444


In [38]:
# checking need to clean data
returns.isnull().sum()

close    1
dtype: int64

In [39]:
# cleaning returns data
returns.dropna(inplace=True)
returns.isnull().sum()
returns.rename(columns={'close':'return'},inplace=True)
returns.head(30)

Unnamed: 0_level_0,return
date,Unnamed: 1_level_1
2020-03-16,-7.601637
2020-03-17,2.696509
2020-03-18,1.919072
2020-03-19,0.357612
2020-03-20,-9.177514
2020-03-23,-5.620946
2020-03-24,12.598626
2020-03-25,-0.88909
2020-03-26,3.527444
2020-03-27,-2.158907


In [12]:
# exporting data to CSV
returns.to_csv('sample_returns_data.csv')