In [183]:
import iexfinance.stocks as iex
from datetime import datetime
import quandl as qdl
import plotly.plotly as ply
import plotly.graph_objs as go
import numpy as np
import pandas as pd

# Plotly sign-in
ply.sign_in('cjk2bg', 'UJ3eIEBVJEf5EXa8zxCb')

# Quandl API key
qdl.ApiConfig.api_key = '4oP7zndMCgnHrGDzLPxX'

# CSV file name
SP500_csv_name = 'constituents_csv.csv'

In [186]:
# Loads CSV file of the current S&P 500 companies...

# Dataframe object contains 3 vectors of shape (1,500)
#     with keys: 'Symbol', 'Name', 'Sector'

global sp
sp = pd.read_csv(SP500_csv_name)
sp.head(25)

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ACN,Accenture plc,Information Technology
5,ATVI,Activision Blizzard,Information Technology
6,AYI,Acuity Brands Inc,Industrials
7,ADBE,Adobe Systems Inc,Information Technology
8,AAP,Advance Auto Parts,Consumer Discretionary
9,AMD,Advanced Micro Devices Inc,Information Technology


In [198]:
# To gather data using Quandl's toolkit, we must specify certain parameters,
#     including (but not limited to) a list of desired tickers to query.

# For now, emulate user choice with random sample of n companies...
n = 15
l_tick = []
choice = np.random.choice(range(0, len(sp['Symbol'])), n, replace=False)

for c in choice:
    l_tick.append(sp['Symbol'][c])

# Zip with indices for later reference (i.e. company name)
global tick_id
tick_id = dict(zip(l_tick, choice))

# As numpy array
tick = np.array(l_tick)

# Tentative start and end dates...
start_date = datetime(2015,12,31) #'2015-12-31'
end_date   = datetime(2019,3,2)   #'2019-3-2'

# Display chosen companies
print(tick)

# Columns to drop
to_drop = ['high', 'low']

# Enumerate all dataframes by tick...
tdf = [
    iex.get_historical_data(t, start_date, end_date, output_format='pandas').drop(columns=to_drop) for t in tick
]
company_data = pd.Series(tdf)
print(company_data[0].keys())

# print(tick[0])
# print(sp['Name'][tick_id[tick[0]]])
# company_data[0]

['VAR' 'ETN' 'TMK' 'LKQ' 'DRE' 'MTB' 'EQIX' 'APH' 'T' 'EA' 'SWKS' 'AET'
 'ESRX' 'XEC' 'EIX']
Index(['open', 'close', 'volume'], dtype='object')


In [189]:
# Up until this point, we have defined the important variables:

# - sp      : DataFrame containing all S&P 500 companies
# - tick_id : Dictionary mapping ticker to corresponding index in sp['Name']
# - tick    : User's list of tickers
# - company_data : Series of DataFrames containing each company's data

# NOTE: an arbitrary iterator i may to both tick and company_data ( thus, |tick| == |company_data| )

# Stock class for better containment of information
class Stock:
    # Constructor
    def __init__(self, tick, name, data=None):
        self.ticker = tick # Stock ticker
        self.name = name   # Company name
        self.data = data   # Data must be input manually...

    # Utilizes previously-created data to find corresponding company name
    def get_company_name(self):
        # return sp['Name'][tick_id[self.ticker]]
        return self.name
        
    # Returns Stock object's ticker
    def get_ticker(self):
        return self.ticker
    
    # Returns DataFrame
    def get_data(self, key=None):
        if key is None:
            return self.data
        return self.data[key]
    
    # State-check of whether or not object has been assigned data
    def has_data(self):
        return len(self.data) > 0
    
    # to-string
    def __str__(self):
        body = self.ticker + '\t\t' + self.name + '\n\t\tData Assigned: ' +\
               str(self.has_data()) + '\n\t\tShape: ' + str(self.data.shape)
        res = '-------------------------------------------------------\n' + body +\
              '\n-------------------------------------------------------'
        return res

In [192]:
stocks = []
# Demonstration
for i in range(0,len(tick)):
    stocks.append(Stock(tick[i], sp['Name'][tick_id[tick[i]]], company_data[i]))
#for s in stocks:
    # s.get_data().to_csv(s.get_ticker()+'.csv')
    # print(s)

In [193]:
c_list = []
c_keys = []
for s in stocks:
    c_list.append(s.get_data('close'))
    c_keys.append(s.get_ticker())

close_price = pd.concat(c_list, join='outer', axis=1, keys=c_keys)
close_price.head()

Unnamed: 0_level_0,VIAB,ANSS,FLR,SO,NUE,SYMC,KO,DLTR,DRI,WELL,RMD,XEL,SWKS,TWX,CSCO
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-12-31,37.9158,92.5,44.6061,40.031,37.1393,16.1568,38.9069,77.22,57.7129,57.4297,50.6835,32.5858,73.1922,61.7894,24.4008
2016-01-04,37.6026,90.1,45.069,40.2364,36.4481,16.0337,38.3997,78.81,56.9058,57.7336,49.2863,32.3952,74.2401,62.0282,23.9163
2016-01-05,37.3631,89.42,44.1905,40.5187,36.826,16.0644,38.5356,79.98,57.9396,59.1687,49.5035,32.7219,69.8198,62.6015,23.8076
2016-01-06,36.9025,88.29,42.9813,40.5273,36.273,16.0414,38.3273,80.52,57.51,59.0421,48.5217,33.0667,65.8758,65.5634,23.5541
2016-01-07,35.8247,87.23,42.0178,40.185,34.4207,15.7721,37.6933,78.45,57.1078,57.3537,48.9559,33.1938,63.9801,67.0731,23.0107


In [194]:
# Function that generates descriptive .csv files given specified key
def create_stock_csv(path=None, stocks=[], key=''):
    if path is None:
        raise ValueError('Please specify a path.')
    # Do nothing
    if len(stocks) == 0 or key == '':
        return
    # Beginning of DataFrame
    c_list = []
    c_keys = []
    print('Begin writing: ' + path)
    
    # Go through stocks
    for s in stocks:
        c_list.append(s.get_data(key))
        c_keys.append(s.get_ticker())
    
    final = pd.concat(c_list, join='outer', axis=1, keys=c_keys)
    final.to_csv(path)
    print('Finished writing: ' + path)

In [195]:
# create_stock_csv('price.csv', stocks, 'close')