<a href="https://colab.research.google.com/github/mmistroni/TensorFlowPlayground/blob/master/StockAndNewsAPIs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install pandas-datareader



In [0]:
import urllib
import json
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as dr
import numpy as np
from datetime import datetime, date

In [0]:
def get_data(symbol, start, end):
  data =  dr.DataReader(symbol, 'iex', start, end)[['close']]
  return data.rename(columns={'close': symbol})

<h2>Authenticate User </h2>

In [0]:
from google.colab import auth
auth.authenticate_user()

<h3> Loading Nasdaq and Nyse shares </h3>

In [3]:
!gsutil cp gs://datascience-bucket-mm/nyse.csv /tmp/nyse.csv
!gsutil cp gs://datascience-bucket-mm/nasdaq.csv /tmp/nyse.csv  
  
# Print the result to make sure the transfer worked.
!cat /tmp/nyse.csv

Copying gs://datascience-bucket-mm/nyse.csv...
/ [1 files][383.8 KiB/383.8 KiB]                                                
Operation completed over 1 objects/383.8 KiB.                                    
Copying gs://datascience-bucket-mm/nasdaq.csv...
/ [1 files][430.9 KiB/430.9 KiB]                                                
Operation completed over 1 objects/430.9 KiB.                                    
"Symbol","Name","LastSale","MarketCap","IPOyear","Sector","industry","Summary Quote"
"PIH","1347 Property Insurance Holdings, Inc.",7.06,"$42.26M",2014,"Finance","Property-Casualty Insurers","https://www.nasdaq.com/symbol/pih"
"PIHPP","1347 Property Insurance Holdings, Inc.",25.65,"n/a","n/a","Finance","Property-Casualty Insurers","https://www.nasdaq.com/symbol/pihpp"
"TURN","180 Degree Capital Corp.",2.22,"$69.09M","n/a","Finance","Finance/Investors Services","https://www.nasdaq.com/symbol/turn"
"FLWS","1-800 FLOWERS.COM, Inc.",14.3,"$923.84M",1999,"Consumer Services","O

<h3>  Testing Historical API </h3>

In [0]:
def get_historical_data(ticker, start, end):

  return dr.DataReader(ticker, "av-daily", start,
                       end,
                       access_key='3K8QCCZYO839KMOU')

def get_date_ranges():
  end_date = date.today()
  start_date = end_date - BDay(60)
  return start_date, end_date

def test():
  start,end = get_date_ranges()
  print('start:{}, end:{}'.format(start_date, end_date))
  print(get_historical_data('AMZN', start_date, end_date))

<h3> Reading source data and computing performance </h3>

In [0]:
def get_nyse_df():
  return pd.read_csv('/tmp/nyse.csv', header=0)#[['Symbol', 'Name', 'Sector', 'industry']]


In [0]:
def compute_performance(start_dt, end_dt, ticker='AMZN'):
  try:
    df = get_historical_data(ticker, start_dt, end_dt)[['close']]
    cols = df.columns
    #print('Start:{}, End:{}'.format(df[cols].iloc[0], df[cols].iloc[-1]))
    pcnt_df = df[cols].iloc[-1] / df[cols].iloc[0] - 1
    res = pd.DataFrame(pcnt_df, columns=['performance'])
    res['Symbol'] = ticker
    res['Latest'] = df[cols].iloc[-1]
    return res
  except Exception as e:
    print('Unable to find data for {}'.format(ticker))

In [0]:
def find_best_performing(start_dt, end_dt):
  print('Finding Best Performing Stocks between:{}-{}'.format(start_dt, end_dt))
  nyse_df = get_nyse_df()
  symbols = nyse_df['Symbol'].values.tolist()
  dfs = [compute_performance(start_dt, end_dt, symbol) for symbol in symbols]
  filtered = [df for df in dfs if df is not None]
  all_data = pd.concat(filtered)
  return pd.merge(nyse_df, all_data, how='inner', on='Symbol' )
  

start_dt, end_dt = get_date_ranges()
perf_df = find_best_performing(start_dt, end_dt)#compute_performance(start_dt, end_dt)
print(perf_df.shape)

Finding Best Performing Stocks between:2019-04-04 00:00:00-2019-06-27
Unable to find data for SRCE
Unable to find data for VNET
Unable to find data for AVHI
Unable to find data for SHLM
Unable to find data for AAON
Unable to find data for ABEO
Unable to find data for ABEOW
Unable to find data for ABIL
Unable to find data for ABMD
Unable to find data for AXAS
Unable to find data for ACIU


In [0]:
# Sorting 
perf_df.sort_values(by=['performance'], inplace=True, ascending=False).head(10)