<a href="https://colab.research.google.com/github/mmistroni/TensorFlowPlayground/blob/master/QSTK_Portfolio_Exercise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h2>Installing required packages </h2>

In [6]:
!pip install pandas-datareader



In [0]:
import urllib
import json
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as dr
import numpy as np
from datetime import datetime, date
import pandas as pd
from pprint import pprint
from collections import OrderedDict

<h3> Computing Sharpe Ratio and Returns </h3>

In [0]:
import logging
logger = logging.getLogger(__name__)
from math import sqrt

def get_prices(ls_symbols, dt_start, dt_end):
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    #c_dataobj = da.DataAccess('Yahoo')
    c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
    ls_keys = ['close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    return  dict(zip(ls_keys, ldf_data)) # each field is returned as a single dataframe

def compute_daily_returns(prices):
    na_rets = prices.copy()
    return tsu.returnize0(na_rets)

def compute_standard_deviations(daily_returns):
    return np.std(daily_returns)

def normalize_prices(prices):
    return prices / prices[0,:]

def compute_sharpe_ratio(daily_returns, standard_dev):
    
    avg = np.average(daily_returns)# / len(daily_returns)
    return (sqrt(252) * avg) / standard_dev

<h3> Compute Statistics </h3>

In [9]:
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as dr
import numpy as np
from datetime import datetime


def get_closes(start, end, symbol):
  return dr.DataReader(symbol, 'iex', start, end)['close']


def compute_statistics(symbol, prices):
  #prices = get_closes(start, end, symbol)
  daily_returns = compute_daily_returns(prices)
  std_dev  = compute_standard_deviations(daily_returns)
  sharpe = compute_sharpe_ratio(daily_returns, std_dev)
  total_returns = (prices[-1]*1.0 / prices[0]) -1
  
  
  return {'std_dev' : std_dev,
         'sharpe_ratio' : sharpe,
         'total_returns' : total_returns,
         'symbol': symbol}
  

end = pd.Timestamp(datetime.now())
start = end - BDay(250)
symbols =  ['NWVCF', 'TGIFF']
           # ['MARA','DPW','TOPS','HAON','HTBX','IMNP','OPGN','SUZBY','ARCO','TRQ','ARTX','PDLI','ENPH','PLPL',
           #'DNN','ZYNE','EYEG','FLKS','APHB','PHBI','ENDV']

all_dicts = []
for sym in symbols:
  try:
    all_dicts.append((sym, get_closes(start, end, sym)))#compute_statistics(start, end ,sym)) #compute_statistics(sym, start, end))
  except Exception as e:
    print('Unable to fetch statistics for %s:%s' % (sym, str(e)))

Unable to fetch statistics for NWVCF:Unable to read URL: https://api.iextrading.com/1.0/stock/market/batch?symbols=NWVCF&types=chart&range=1y
Response Text:
b'Forbidden'
Unable to fetch statistics for TGIFF:Unable to read URL: https://api.iextrading.com/1.0/stock/market/batch?symbols=TGIFF&types=chart&range=1y
Response Text:
b'Forbidden'


In [10]:
all_df = []
for symbol, prices in all_dicts:
  all_df.append(compute_statistics(symbol, prices))
df = pd.DataFrame(all_df)[['symbol', 'total_returns','std_dev', 'sharpe_ratio']]
df

KeyError: ignored

<h3>Computing Future Earnings </h3>

In [0]:
import urllib
import json
import pandas as pd


def get_future_earnings_df(symbol):

  base_url = 'https://us-central1-datascience-projects.cloudfunctions.net/future_earnings/{}'.format(symbol)
  res = urllib.request.urlopen(base_url)
  json_dict = res.read() #json.load(res)
  try:
    df = pd.read_json(json_dict)

    cons  = df['Consensus'].values
    fisc = df['Fiscal'].values
    new_cols = ['T+{}'.format(idx) for idx in range(1, len(fisc) + 1)]
    
    converted = pd.DataFrame([cons], columns=fisc)
    converted['symbol'] = symbol
    return converted
  except Exception as e:
    print('exception in retrieving earnings for {}/{}'.format(symbol,str(e)))

In [14]:
# Computing Earnings
symbols = """AAPL
MSFT
JNJ
XOM
JPM
AMZN
GOOGL
T
F
CVX
PG
INTC
PFE
CSCO
BRK.B
GOOG
VZ
DIS
HD
PEP
V
SPY
BAC
MRK
IBM
GE
WFC
ABBV
KO
BA
""".split('\n')

all_dfs = [get_future_earnings_df(symbol) for symbol in symbols]

exception in retrieving earnings for SPY/'Consensus'
exception in retrieving earnings for /'Consensus'


In [0]:
good_ones = filter(lambda df: df is not None, all_dfs)

min_shape = min(map(lambda df: df.shape[1], good_ones))
col_to_project = good_ones[0].columns[0:min_shape]

print ('Cols to project:{}'.format(col_to_project))


reduced_df = reduce(lambda acc, item: item if acc is None else acc.append(item), good_ones, None)

reduced_df

In [0]:
merged= pd.merge(df, reduced_df, how='left', on='symbol')

merged

# SAving to csv file
merged.to_csv('/tmp/results.csv')

In [15]:
# checking the file is there
!cat /tmp/results.csv

cat: /tmp/results.csv: No such file or directory


<h3> Computing Moving Average </h3>

In [16]:
import pandas as pd
from datetime import date, datetime
end = pd.Timestamp(datetime.now())
start = end - BDay(250)
import numpy as np


def get_data(start, end, symbol):
  return dr.DataReader(symbol, 'iex', start, end)['close']


def compute_avg(prices):
  mapped = map(lambda tpl: tpl[1], prices)
  return np.average(mapped) if prices else 0.0


def compute_moving_avg(prices, days):
  res = [(prices[i][0], prices[i][1],  compute_avg(prices[i-days:i])) for i in range(0, len(prices))]
  return res
  
  

end = pd.Timestamp(datetime.now())
start = end - BDay(400)  
res =get_data(start, end, 'AMZN')  

all_data = zip(res.index, res)

result = compute_moving_avg(all_data, 200)

dr.DataReader('AMZN', 'iex', date(2019,1,14), date(2019,1,23))

RemoteDataError: ignored

<h3> Creating Market Simulator </h3>

In [0]:
import csv
from datetime import date, datetime
from pandas.tseries.offsets import BDay
import pandas as pd
import logging
from pprint import pprint

multiline = \
"""
2008-12-03,AAPL,BUY,130
2008-12-08,AAPL,SELL,130
2008-12-05,IBM,BUY,50
"""



class Simulator:
  def __init__(self, ticker_dict, initial_cash):
    self.ticker_dict = ticker_dict
    self.initial_cash = initial_cash
    
  def simulate(self, trade):
    print('Updating with {}'.format(trade))
    as_of_dt, ticker, order_type, qty = trade
    current_status = self.ticker_dict[ticker] # a listof (asofdate, qty)
    market_qty = qty if order_type == 'BUY' else -qty
    if not current_status:
      current_status.append((as_of_dt.strftime('%Y-%m-%d'), qty ))
    else:
      prev_dt, prev_qty = current_status[-1]
      new_qty = prev_qty + market_qty
      print('Appending:{} @{}'.format(new_qty, as_of_dt))
      current_status.append((as_of_dt.strftime('%Y-%m-%d'), prev_qty + market_qty))
    
  def current_status(self):
    return self.ticker_dict

def parse_line(line):
  raw_data = line.split(',')
  return Order(raw_data)

def find_all_tickers(orders):
  return set(map(lambda t: t[1], orders))  

def find_date_range(dates):
  ts = pd.bdate_range(dates[0], dates[-1])
  return [d for d in map(lambda t: t.date().strftime('%Y-%m-%d'), ts)]

def generate_trade(row):
  asOfDate, ticker, orderType, qty = row.split(',')
    
  return [datetime.strptime(asOfDate, "%Y-%m-%d").date(),
            ticker, orderType, int(qty)]
    
def market_sim(initial_cash, orders):
  trades = sorted(orders, key=lambda t: t[0])
  tickers = find_all_tickers(trades)
  symbols_dict = OrderedDict()
  for ticker in tickers:
    symbols_dict[ticker] = []
  
  return Simulator(symbols_dict, initial_cash)



<h3> Running Simulator </h3>

In [67]:

trade_rows  = [t for t in filter(lambda row:bool(row), map(lambda row: row.strip(), multiline.split('\n')))]
all_trades =  [generate_trade(row) for row in trade_rows]
simulator = market_sim(1000000,all_trades)
pprint(all_trades)
for trade in all_trades:
  simulator.simulate(trade)
  
  
print ('------ STATUS---- ')
for key, val in simulator.current_status().items():
  print('------ {} ------'.format(key))
  pprint(val)
  



[[datetime.date(2008, 12, 3), 'AAPL', 'BUY', 130],
 [datetime.date(2008, 12, 8), 'AAPL', 'SELL', 130],
 [datetime.date(2008, 12, 5), 'IBM', 'BUY', 50]]
Updating with [datetime.date(2008, 12, 3), 'AAPL', 'BUY', 130]
Updating with [datetime.date(2008, 12, 8), 'AAPL', 'SELL', 130]
Appending:0 @2008-12-08
Updating with [datetime.date(2008, 12, 5), 'IBM', 'BUY', 50]
------ STATUS---- 
------ AAPL ------
[('2008-12-03', 130), ('2008-12-08', 0)]
------ IBM ------
[('2008-12-05', 50)]


<h3>Perfecting the DateRange</h3>

In [85]:
mapped_dates = map(lambda t: t[0], all_trades)
sorted_dates = sorted(mapped_dates, key=lambda x:x)
all_dates = find_date_range(sorted_dates)
all_dates_df = pd.DataFrame(all_dates, columns=['AsOfDate'])


def join_with_all_trades(ticker_items):
  print('Creating Df for {}'.format(ticker_items))
  symbol_df = pd.DataFrame(ticker_items, columns=['AsOfDate', 'Qty'])
  joined = pd.merge(all_dates_df, symbol_df, on='AsOfDate', how='left')
  # forward filling
  updated =  joined.fillna(method='ffill')
  # filling zeroes
  return updated.fillna(0)
  
  
all_dfs = [(k, join_with_all_trades(v)) for k,v in simulator.current_status().items()] 



for symbol, df  in all_dfs:
  print('===={}====='.format(symbol))
  print(df.head(20))
  






#cash = pd.DataFrame(columns=['Cash'], index=date_range)
#portfolio = pd.DataFrame(columns=tickers, index=date_range)


Creating Df for [('2008-12-03', 130), ('2008-12-08', 0)]
Creating Df for [('2008-12-05', 50)]
====AAPL=====
     AsOfDate    Qty
0  2008-12-03  130.0
1  2008-12-04  130.0
2  2008-12-05  130.0
3  2008-12-08    0.0
====IBM=====
     AsOfDate   Qty
0  2008-12-03   0.0
1  2008-12-04   0.0
2  2008-12-05  50.0
3  2008-12-08  50.0
