<a href="https://colab.research.google.com/github/mmistroni/GCP_Experiments/blob/master/StockAndNewsAPIs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:

!pip install pandas-datareader

In [0]:
!pip install -U -q PyDrive

In [0]:
import urllib
import json
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as dr
import numpy as np
from datetime import datetime, date


<h2>Authenticate User </h2>

In [0]:
from google.colab import auth
auth.authenticate_user()



<h3>Loading Credentials</h3>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

def get_iexapi_keys():
  with open('gdrive/My Drive/passwords/iexapi.keys') as f:
    return f.readlines()[0]

def get_nlp_service_keys():
  with open('gdrive/My Drive/passwords/nlp.keys') as f:
    return f.readlines()[0]

def get_newsapi_keys():
  with open('gdrive/My Drive/passwords/newsapi.keys') as f:
    return f.readlines()[0]



<h3>IEX API CALLS </h3>


In [0]:
import requests

token = get_iexapi_keys()

def get_statistics(ticker):
  base_url = 'https://cloud.iexapis.com/stable/stock/{symbol}/stats?token={token}&format=csv&filter=companyName,symbol,beta,day50MovingAvg,day200MovingAvg,month6ChangePercent,month3ChangePercent,month1ChangePercent'.format(token=token,symbol=ticker)
  df = pd.read_csv(base_url)
  df['Symbol'] = ticker
  return df

def get_historical_data(ticker, start, end):
  df = get_statistics(ticker)
  return df


def get_all_stocks():
  all_symbols_data = requests.get('https://cloud.iexapis.com/stable/ref-data/iex/symbols?token={token}'.format(token=token)).json()
  return [d['symbol'] for d in all_symbols_data if d['isEnabled']and d['type'].lower() == 'cs']

def get_all_us_stocks(security_type='cs'):
  nyse_symbols = requests.get('https://cloud.iexapis.com/stable/ref-data/exchange/nys/symbols?token={token}'.format(token=token)).json()
  #nas_symbols = requests.get('https://cloud.iexapis.com/stable/ref-data/exchange/nas/symbols?token={token}'.format(token=token)).json()
  return [d['symbol'] for d in nyse_symbols  if d['type'].lower() == security_type]

def get_all_etfs():
  stocks= get_all_us_stocks()
  return [d['symbol'] for d in stocks if d['type'].lower() == 'et']

def get_all_stocks_data():
  good_ones = get_all_etfs()
  return map(lambda symbol: (symbol, get_historical_value(symbol)), good_ones)


def get_all_exchanges():
  return requests.get('https://cloud.iexapis.com/stable/ref-data/market/us/exchanges?token={token}'.format(token=token)).json()

def get_latest_price(symbol):
  base_url = "https://cloud.iexapis.com/stable/stock/{ticker}/quote?token={token}&format=csv&filter=symbol,close".format(token=token,ticker=symbol)
  import requests
  return pd.read_csv(base_url)

def get_quote(symbol):
  try:
    historical_url = 'https://cloud.iexapis.com/stable/stock/{symbol}/quote/latestPrice?token={token}'.format(token=token,symbol=symbol)
    return requests.get(historical_url).json()
  except:
    return -1

def get_news(symbol, num_of_news):
  try:
    news_url = 'https://cloud.iexapis.com/stable//stock/{symbol}/news/last/{last}?token={token}'.format(symbol=symbol, last=num_of_news,
                                                                                                        token=token)
    return requests.get(news_url).json()
  except Exception as e :
    print('Excepiton for {}:{}'.format(symbol, str(e)))
    return []


<h3> Yahoo API Calls </h3>

In [0]:
from scipy.stats import pearsonr
import requests

def get_latest_price_yahoo(symbol, as_of_date):
  try:#
    print('--latest price for{}'.format(symbol))
    res = dr.get_data_yahoo(symbol, as_of_date, as_of_date)[['Close']]
    df['Symbol'] = symbol
    return df
  except Exception as e :
    return pd.DataFrame(columns=[symbol])

def get_historical_data_yahoo(symbol, start_dt, end_dt):
  try: 
    end_date = date.today()
    print('hist dat for:{}'.format(symbol))
    data = dr.get_data_yahoo(symbol, start_dt, end_dt)[['Adj Close']]
    df =  data.rename(columns={'Adj Close' : symbol})
    return df
  except Exception as e :
    return pd.DataFrame(columns=[symbol])   
  

<h3> Getting Sentiment Analysis from Google </h3>

In [0]:
def get_sentiment_from_google(content):
  from google.cloud import language
  from google.cloud.language import enums
  from google.cloud.language import types

  client = language.LanguageServiceClient()
  document = types.Document(
      content=clean_text,
      type=enums.Document.Type.PLAIN_TEXT)

  # Detects the sentiment of the text
  sentiment = client.analyze_sentiment(document=document).document_sentiment

  



<h3> Google Language API </h3>

In [0]:
def test_language_api(content):
  import httplib2
  import sys
  from googleapiclient import discovery
  from googleapiclient.errors import HttpError

  discovery_url = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'

  service = discovery.build(
      'language', 'v1',
      http=httplib2.Http(),
      discoveryServiceUrl=discovery_url,
      developerKey=get_nlp_service_keys(),
  )
  service_request = service.documents().annotateText(
      body={
          'document': {
              'type': 'PLAIN_TEXT',
              'content': content,
          },
          'features': {
              'extract_syntax': True,
              'extractEntities': True,
              'extractDocumentSentiment': True,
          },
          'encodingType': 'UTF16' if sys.maxunicode == 65535 else 'UTF32',
      })
  try:
      #print('************************')
      #print('Retrieving sentiment for:{}'.format(content))
      response = service_request.execute()
      return  response['documentSentiment']['score']
      
  except HttpError as e:
      response = {'error': e}
      print('exception:{}'.format(str(e)))
      return 0


<h3> Getting News from News API <h3>

In [0]:
def retrieve_all_news(symbol):
  print('Retrieving all news for@{}'.format(symbol))
  token = get_newsapi_keys()
  all_news = 'https://newsapi.org/v2/everything?q={ticker}&apiKey={token}'.format(ticker=symbol, token=token)
  data = requests.get(all_news).json()
  res = data['articles']
  return map(lambda data: data['content'], res)

def calculate_sentiment(news_items):
  res = map(lambda item: test_language_api(item), news_items)
  all_news =  list(res)
  pprint('Total:{}'.format(all_news))
  return dict(total=sum(all_news), positive=[i for i in all_news if i > 0], negative=[i for i in all_news if i <0])

def calculate_news_sentiment(ticker):
  latest_news = list(retrieve_all_news(ticker))
  return calculate_sentiment(latest_news)



<h3>Computing various  metrics </h3>

In [0]:
# we need to sort out which function we need.
#1. calculate sharpe ratio  DONE
#2. get month change percent m30
#3. get full performance   DONE
#4. get news  DONE
#5 group together and compute

import logging
logger = logging.getLogger(__name__)
from math import sqrt

def calculate_daily_returns(prices):
  return prices.pct_change(1)

def calculate_daily_cumulative_returns(daily_pc):
  return (1 + daily_pc).cumprod()

def compute_standard_deviation(daily):
  return daily.loc[:,daily.columns[0]].std()

def compute_sharpe_ratio(s_prices):
  # This function should be used in final dataframe
  # USE THSI FUNCTION
  dret = calculate_daily_returns(s_prices)
  avg = dret.loc[:,dret.columns[0]].mean()
  std = compute_standard_deviation(dret)
  return (sqrt(252) * avg) / std

def compute_moving_averages(prices, day):
  print('Computing moving avg for:{}'.format(day))
  return prices.rolling(window=day).mean()

def check_prices_vs_moving_averages(prices, day=30):
  # This Function should be used  in final dataframe
  ma30 = compute_moving_averages(prices, 30)
  ticker_col = ma30.columns[0]
  m30_col = '{}M30'.format(ticker_col)
  ma30_renamed = ma30.rename({ticker_col: m30_col}, axis=1)
  concats = pd.concat([prices, ma30_renamed], axis=1)
  concats['AboveM30'] = concats[ticker_col] > concats[m30_col]
  above_m30 = concats[concats['AboveM30'] == True]
  total_prices = prices.shape[0]
  total_m30 = above_m30.shape[0]
  pcnt = 1.0*total_m30/total_prices
  return pcnt

def compute_performance(historical_df):
  # Use this FUNCTION CALL
  start = historical_df['AMZN'].values[0]
  end = historical_df['AMZN'].values[-1]
  print('Start:{}, End:{}'.format(start, end))
  return end*1.0/start - 1

def compute_metrics(prices):
  ticker = prices.columns[0]
  print('Computing Metrics for:{}'.format(ticker))
  perf_dict = {}
  print('Computing performance..')
  perf_dict['Performance'] = compute_performance(prices)
  print('Computing days above moving average..')
  perf_dict['AboveMovingAvgPcnt'] = check_prices_vs_moving_averages(prices)
  print('comuting sharpe ratio....')
  perf_dict['SharpeRatio'] = compute_sharpe_ratio(prices)
  print('Returning data...')
  perf_dict['News_Sentiment'] = calculate_news_sentiment(ticker)
  return perf_dict


In [0]:
# get yahoo historical data
from datetime import date
historical_df =  get_historical_data_yahoo('AMZN', date(2019,10,1), date.today())
compute_metrics(historical_df)



In [0]:
historical_df.pct_change().values.sum()

In [0]:
start = historical_df['AMZN'].values[0]
end = historical_df['AMZN'].values[-1]
pcnt = end/start - 1
print('Start:{}.end:{}.Change:{}'.format(start, end, pcnt))



In [0]:
get_statistics('AMZN') # this cost 5 messages per share

<h3> Reading source data and computing performance </h3>

In [0]:
def get_date_ranges():
  end_date = date.today()
  start_date = end_date - BDay(30)
  return start_date, end_date

def compute_performance(start_dt, end_dt, ticker):
  try:
    # Here we need to compute, in addition to performance, also sharpe ratio etc.
    import time
    historical_df =  get_historical_data_yahoo(ticker, start_dt, end_dt)
    print('hist df cols:{}'.format(historical_df.columns))
    latest_df = get_latest_price_yahoo(ticker, end_dt)
    print('latest df ocls:{}'.format(latest_df.columns))
    merged = pd.merge(historical_df, latest_df, how='inner' , on=ticker)
    return merged
  except Exception as e:
    print('Exception:{}'.format(str(e)))
    print('Unable to find data for {}:{}'.format(ticker,str(e)))
    
def find_best_performing(start_dt, end_dt):
  print('Finding Best Performing Stocks between:{}-{}'.format(start_dt, end_dt))
  symbols = ['ABBV'] #get_all_us_stocks()[0:10]
  print('Now we have to source data for:{}'.format(len(symbols)))
  dfs = (compute_performance(start_dt, end_dt, symbol) for symbol in symbols)
  filtered = (df for df in dfs if df is not None)
  all_data = pd.concat(filtered)
  return pd.merge(nyse_df, all_data, how='inner', on='Symbol' )[['Symbol', 'Name', 'Sector', 'industry', 'companyName','close', 
       'month1ChangePercent','month3ChangePercent', 'month6ChangePercent',  'day200MovingAvg', 'day50MovingAvg']]

  

start_dt, end_dt = get_date_ranges()
perf_df = find_best_performing(start_dt, end_dt)
# Sorting 
#perf_df.sort_values(by=['month1ChangePercent'], inplace=True, ascending=False)



In [0]:
get_historical_data_yahoo('AMZN', date(2019,11,24), date.today())


<h2> TODO: Fetch news for every ticker and find out sentiment. then build a dataframe of symbol, positive news, performance </h3>

<h3> Group by sector, to find best performers </h3>

In [0]:
res = perf_df[['industry', 'month1ChangePercent','month3ChangePercent', ]].groupby(['industry']).mean().sort_values(by=['month1ChangePercent','month1ChangePercent'], ascending=False)
res.head(20)

<p> Testing all stocks in portfolio </p>

In [0]:
portfolio_shares = ['ADAC', 'AMBS', 'AMZN', 'AZFL', 'ARSC', 'AAPL', 'APTY',
                    'BTCS', 'BRK-B', 'CRNT', 'CRLBF', 'XOM', 'HAON', 'AGEEF',
                    'HMNY', 'JNJ', 'LEMIF', 'NXTTF', 'NVCN', 'RNVA', 'TORC',
                    'RTRX', 'VALE', 'VZ', 'DGP', 'RUSL', 'REMX', 'TVIX' ]

all_shares = get_all_stocks()
res = map(lambda symbol:(symbol, symbol in all_shares), shares)
invalid = [tpl for tpl in res if not tpl[1]]


<h3> Performance Functions </h3>

In [0]:
def calculate_correlation_all(vix, all_stocks):
  all_df = [vix]
  res= [vals for _, vals in all_stocks if vals.shape[0] == vix_vals.shape[0]]
  res.append(vix)
  all_data = pd.concat(res, axis=1)
  return all_data.corr('pearson')

def calculate_portfolio_correlation(all_stocks):
  res= [vals for _, vals in all_stocks if vals.shape[0] > 2]
  all_data = pd.concat(res, axis=1)
  return all_data.corr('pearson')


def calculate_correlation(vix, all_stocks):
  result = []
  best = 0
  for symbol, vals in all_stocks:
    if vals.shape[0] == vix.shape[0]:
      concats  = pd.concat([vix, vals], axis = 1)
      corr_matrix = concats.corr(method='pearson')
      corr_with_vix = corr_matrix.loc['^VIX'][1]
      if corr_with_vix > 0 and corr_with_vix > best:
        print('New Corr with {}:{}'.format(symbol, corr_with_vix))
        best = corr_with_vix
  return best

def _get_most_correlated(result_df):
  df = result_df[['^VIX']]
  bad_df = df.index.isin(['^VIX'])
  return df[~bad_df]
  


In [0]:
#vix_vals = get_historical_value('^VIX')

all_stocks_data = map(lambda symbol: (symbol, get_historical_value(symbol)), portfolio_shares)
best = calculate_portfolio_correlation(all_stocks_data)      


In [0]:
res = _get_most_correlated(best)
sorted_df = res.sort_values('^VIX', ascending=False)
sorted_df.head(10)


In [0]:
idx = sorted_df.iloc[0]

print(idx.values.tolist()[0])
print(sorted_df.index[0])

<h3> Getting All US Stocks </h3>

In [0]:
all_stocks = get_all_us_stocks()
print('We got to find:{}'.format(len(all_stocks)))
all_stocks[0]

<h3> REtrieving News </h3>

In [0]:
def get_iexapi_news(ticker):
  all_news = map(lambda ticker : get_news(ticker, 100), all_stocks) # will not work.intraday news. 
  data = list(all_news)
  return data