<a href="https://colab.research.google.com/github/mmistroni/TensorFlowPlayground/blob/master/StockAndNewsAPIs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

!pip install pandas-datareader



In [0]:
!pip install -U -q PyDrive

In [0]:
import urllib
import json
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as dr
import numpy as np
from datetime import datetime, date


<h2>Authenticate User </h2>

In [0]:
from google.colab import auth
auth.authenticate_user()


<h3> Loading Nasdaq and Nyse shares </h3>

In [0]:
!gsutil cp gs://datascience-bucket-mm/nyse-companylist.csv /tmp/nyse.csv
!gsutil cp gs://datascience-bucket-mm/nasdaq-companylist.csv /tmp/nyse.csv  
  
# Print the result to make sure the transfer worked.
#!cat /tmp/nyse.csv

Copying gs://datascience-bucket-mm/nyse-companylist.csv...
/ [1 files][392.3 KiB/392.3 KiB]                                                
Operation completed over 1 objects/392.3 KiB.                                    
Copying gs://datascience-bucket-mm/nasdaq-companylist.csv...
/ [1 files][462.5 KiB/462.5 KiB]                                                
Operation completed over 1 objects/462.5 KiB.                                    



<h3>Loading Credentials</h3>

In [32]:
from google.colab import drive
drive.mount('/content/gdrive')

def get_iexapi_keys():
  with open('gdrive/My Drive/passwords/iexapi.keys') as f:
    return f.readlines()[0]

def get_nlp_service_keys():
  with open('gdrive/My Drive/passwords/nlp.keys') as f:
    return f.readlines()[0]

def get_newsapi_keys():
  with open('gdrive/My Drive/passwords/newsapi.keys') as f:
    return f.readlines()[0]



Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


<h3>IEX API CALLS </h3>


In [0]:
import requests

token = get_iexapi_keys()

def get_statistics(ticker):
  base_url = 'https://cloud.iexapis.com/stable/stock/{symbol}/stats?token={token}&format=csv&filter=companyName,symbol,beta,day50MovingAvg,day200MovingAvg,month6ChangePercent,month3ChangePercent,month1ChangePercent'.format(token=token,symbol=ticker)
  df = pd.read_csv(base_url)
  df['Symbol'] = ticker
  return df

def get_historical_data(ticker, start, end):
  df = get_statistics(ticker)
  return df


def get_all_stocks():
  all_symbols_data = requests.get('https://cloud.iexapis.com/stable/ref-data/iex/symbols?token={token}'.format(token=token)).json()
  return [d['symbol'] for d in all_symbols_data if d['isEnabled']and d['type'].lower() == 'cs']

def get_all_us_stocks(security_type='cs'):
  nyse_symbols = requests.get('https://cloud.iexapis.com/stable/ref-data/exchange/nys/symbols?token={token}'.format(token=token)).json()
  #nas_symbols = requests.get('https://cloud.iexapis.com/stable/ref-data/exchange/nas/symbols?token={token}'.format(token=token)).json()
  return [d['symbol'] for d in nyse_symbols  if d['type'].lower() == security_type]

def get_all_etfs():
  stocks= get_all_us_stocks()
  return [d['symbol'] for d in stocks if d['type'].lower() == 'et']

def get_all_stocks_data():
  good_ones = get_all_etfs()
  return map(lambda symbol: (symbol, get_historical_value(symbol)), good_ones)


def get_all_exchanges():
  return requests.get('https://cloud.iexapis.com/stable/ref-data/market/us/exchanges?token={token}'.format(token=token)).json()

def get_latest_price(symbol):
  base_url = "https://cloud.iexapis.com/stable/stock/{ticker}/quote?token={token}&format=csv&filter=symbol,close".format(token=token,ticker=symbol)
  import requests
  return pd.read_csv(base_url)

def get_quote(symbol):
  try:
    historical_url = 'https://cloud.iexapis.com/stable/stock/{symbol}/quote/latestPrice?token={token}'.format(token=token,symbol=symbol)
    return requests.get(historical_url).json()
  except:
    return -1

def get_news(symbol, num_of_news):
  try:
    news_url = 'https://cloud.iexapis.com/stable//stock/{symbol}/news/last/{last}?token={token}'.format(symbol=symbol, last=num_of_news,
                                                                                                        token=token)
    return requests.get(news_url).json()
  except Exception as e :
    print('Excepiton for {}:{}'.format(symbol, str(e)))
    return []


<h3> Yahoo API Calls </h3>

In [0]:
from scipy.stats import pearsonr
import requests

def get_latest_price_yahoo(symbol, as_of_date):
  try:
    return dr.get_data_yahoo(symbol, as_of_date, as_of_date)[['Close']]
  except Exception as e :
    return pd.DataFrame(columns=['Close'])[['Close']]

def get_historical_value(symbol):
  try: 
    data = dr.get_data_yahoo(symbol, date(2018,1,1), date(2019,9,19))[['Adj Close']]
    df =  data.rename(columns={'Adj Close' : symbol})
    return df
  except Exception as e :
    return pd.DataFrame(columns=[symbol])   
  

<h3> Getting Sentiment Analysis from Google </h3>

In [0]:
def get_sentiment_from_google(content):
  from google.cloud import language
  from google.cloud.language import enums
  from google.cloud.language import types

  client = language.LanguageServiceClient()
  document = types.Document(
      content=clean_text,
      type=enums.Document.Type.PLAIN_TEXT)

  # Detects the sentiment of the text
  sentiment = client.analyze_sentiment(document=document).document_sentiment

def test_language_api(content):
  import httplib2
  import sys
  from googleapiclient import discovery
  from googleapiclient.errors import HttpError

  discovery_url = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'

  service = discovery.build(
      'language', 'v1',
      http=httplib2.Http(),
      discoveryServiceUrl=discovery_url,
      developerKey=get_nlp_service_keys(),
  )
  service_request = service.documents().annotateText(
      body={
          'document': {
              'type': 'PLAIN_TEXT',
              'content': content,
          },
          'features': {
              'extract_syntax': True,
              'extractEntities': True,
              'extractDocumentSentiment': True,
          },
          'encodingType': 'UTF16' if sys.maxunicode == 65535 else 'UTF32',
      })
  try:
      response = service_request.execute()
  except HttpError as e:
      response = {'error': e}
  return response



<h3> Reading source data and computing performance </h3>

In [0]:
def get_date_ranges():
  end_date = date.today()
  start_date = end_date - BDay(60)
  return start_date, end_date

def test():
  start,end = get_date_ranges()
  print('start:{}, end:{}'.format(start_date, end_date))
  print(get_historical_data('AMZN', start_date, end_date))
  

  
def get_nyse_df():
  return pd.read_csv('/tmp/nyse.csv', header=0)[['Symbol', 'Name', 'Sector', 'industry']]

def compute_performance(start_dt, end_dt, ticker):
  try:
    import time
    historical_df =  get_historical_data(ticker, start_dt, end_dt)
    latest_df = get_latest_price(ticker)
    merged = pd.merge(historical_df, latest_df, how='inner' , left_on="Symbol", right_on="symbol").drop('symbol', axis=1)
    return merged
  except Exception as e:
    print('Exception:{}'.format(str(e)))
    print('Unable to find data for {}:{}'.format(ticker,str(e)))
    
def find_best_performing(start_dt, end_dt):
  print('Finding Best Performing Stocks between:{}-{}'.format(start_dt, end_dt))
  nyse_df = get_nyse_df()
  symbols = nyse_df['Symbol'].values.tolist()
  print('Now we have to source data for:{}'.format(len(symbols)))
  dfs = (compute_performance(start_dt, end_dt, symbol) for symbol in symbols)
  filtered = (df for df in dfs if df is not None)
  all_data = pd.concat(filtered)
  return pd.merge(nyse_df, all_data, how='inner', on='Symbol' )[['Symbol', 'Name', 'Sector', 'industry', 'companyName','close', 
       'month1ChangePercent','month3ChangePercent', 'month6ChangePercent',  'day200MovingAvg', 'day50MovingAvg']]

  

start_dt, end_dt = get_date_ranges()
perf_df = find_best_performing(start_dt, end_dt)
# Sorting 
perf_df.sort_values(by=['month1ChangePercent'], inplace=True, ascending=False)



<h3> Group by sector, to find best performers </h3>

In [0]:
res = perf_df[['industry', 'month1ChangePercent','month3ChangePercent', ]].groupby(['industry']).mean().sort_values(by=['month1ChangePercent','month1ChangePercent'], ascending=False)
res.head(20)

<p> Testing all stocks in portfolio </p>

In [0]:
portfolio_shares = ['ADAC', 'AMBS', 'AMZN', 'AZFL', 'ARSC', 'AAPL', 'APTY',
                    'BTCS', 'BRK-B', 'CRNT', 'CRLBF', 'XOM', 'HAON', 'AGEEF',
                    'HMNY', 'JNJ', 'LEMIF', 'NXTTF', 'NVCN', 'RNVA', 'TORC',
                    'RTRX', 'VALE', 'VZ', 'DGP', 'RUSL', 'REMX', 'TVIX' ]

all_shares = get_all_stocks()
res = map(lambda symbol:(symbol, symbol in all_shares), shares)
invalid = [tpl for tpl in res if not tpl[1]]


JSONDecodeError: ignored

<h3> Performance Functions </h3>

In [0]:
def calculate_correlation_all(vix, all_stocks):
  all_df = [vix]
  res= [vals for _, vals in all_stocks if vals.shape[0] == vix_vals.shape[0]]
  res.append(vix)
  all_data = pd.concat(res, axis=1)
  return all_data.corr('pearson')

def calculate_portfolio_correlation(all_stocks):
  res= [vals for _, vals in all_stocks if vals.shape[0] > 2]
  all_data = pd.concat(res, axis=1)
  return all_data.corr('pearson')


def calculate_correlation(vix, all_stocks):
  result = []
  best = 0
  for symbol, vals in all_stocks:
    if vals.shape[0] == vix.shape[0]:
      concats  = pd.concat([vix, vals], axis = 1)
      corr_matrix = concats.corr(method='pearson')
      corr_with_vix = corr_matrix.loc['^VIX'][1]
      if corr_with_vix > 0 and corr_with_vix > best:
        print('New Corr with {}:{}'.format(symbol, corr_with_vix))
        best = corr_with_vix
  return best

def _get_most_correlated(result_df):
  df = result_df[['^VIX']]
  bad_df = df.index.isin(['^VIX'])
  return df[~bad_df]
  


In [0]:
#vix_vals = get_historical_value('^VIX')

all_stocks_data = map(lambda symbol: (symbol, get_historical_value(symbol)), portfolio_shares)
best = calculate_portfolio_correlation(all_stocks_data)      


In [0]:
res = _get_most_correlated(best)
sorted_df = res.sort_values('^VIX', ascending=False)
sorted_df.head(10)


In [0]:
idx = sorted_df.iloc[0]

print(idx.values.tolist()[0])
print(sorted_df.index[0])

0.07980534120952935
AAME


In [0]:
sorted_df.index


In [0]:
corr_matrix = all_df.corr(method='pearson')

In [0]:
corr_matrix

In [0]:
prices = map(lambda s: (s, get_quote(s)), all_stocks[0:2000])
filtered = [tpl for tpl in prices if tpl[1]  > 800]

<h3> Getting All US Stocks </h3>

In [10]:
all_stocks = get_all_us_stocks()
print('We got to find:{}'.format(len(all_stocks)))
all_stocks[0]

We got to find:1823


'A'

<h3> REtrieving News </h3>

In [0]:
all_news = map(lambda ticker : get_news(ticker, 100), all_stocks) # will not work.intraday news. 
data = list(all_news)

In [30]:
from datetime import datetime
ts = data[-1][0]['datetime']
import time
print(ts)
type(ts)
datetime.fromtimestamp(ts/1000).date()

1572994800000


datetime.date(2019, 11, 5)

<h3> REtrievign News from News API </h3>

In [44]:
# Sample Amazon URL
token = get_newsapi_keys()
all_news = 'https://newsapi.org/v2/everything?q=Amazon&apiKey={token}'.format(token=token)
data = requests.get(all_news).json()
res = data['articles']
tpl = map(lambda d: (d['source']['name'], d['publishedAt']), res)
from pprint import pprint
pprint(list(tpl))




[('TechCrunch', '2019-10-29T13:17:42Z'),
 ('Engadget', '2019-10-20T23:22:00Z'),
 ('TechCrunch', '2019-10-29T10:00:04Z'),
 ('Lifehacker.com', '2019-10-18T15:10:00Z'),
 ('Lifehacker.com', '2019-10-23T18:45:00Z'),
 ('Engadget', '2019-10-15T06:15:00Z'),
 ('Engadget', '2019-11-01T16:44:00Z'),
 ('Engadget', '2019-11-06T17:10:00Z'),
 ('Engadget', '2019-10-31T21:14:00Z'),
 ('Engadget', '2019-10-17T21:49:00Z'),
 ('Engadget', '2019-10-30T19:10:00Z'),
 ('Engadget', '2019-10-31T13:18:00Z'),
 ('TechCrunch', '2019-11-02T10:19:49Z'),
 ('Engadget', '2019-11-13T11:01:00Z'),
 ('Engadget', '2019-10-16T18:11:00Z'),
 ('Mashable', '2019-11-06T15:29:54Z'),
 ('Mashable', '2019-11-12T14:43:57Z'),
 ('Mashable', '2019-10-29T15:02:23Z'),
 ('Lifehacker.com', '2019-10-16T18:15:00Z'),
 ('Lifehacker.com', '2019-10-27T17:13:00Z')]


In [0]:
test_language_api(one_news)['documentSentiment']['score']

-0.8