# Updating the SMD

In [None]:
# Imports
import config as cf
import utils as ut

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default = 'warn'

## Connections

In [None]:
# Connect to DB
conn = ut.connect_db()
cursor = conn.cursor()

# Connect to Alpaca API
api = ut.connect_api()

In [None]:
# Get currently listed assets from Alpaca
assets = api.list_assets()

# Get all assets from SMD
tickers = ut.get_all_tickers(conn)

In [None]:
yesterday = str(ut.get_yesterday())
yesterday

In [None]:
new_tickers_alpaca = ut.get_new_tickers(assets, tickers, conn)

if new_tickers_alpaca.empty:
  print('No new tickers have been identified. Instruments are up to date.')
  new_tickers = []

## New instruments

### Adding new instruments

In [None]:
new_tickers_alpaca

In [None]:
exchanges = new_tickers_alpaca['exchange_id'].to_list()

tmp = []
for exchange in exchanges:
  tmp.append(ut.get_exchange_id(exchange, conn))

In [None]:
new_tickers_alpaca.drop('exchange_id', axis = 1, inplace = True)
new_tickers_alpaca.insert(0, 'exchange_id', tmp)
new_tickers_alpaca

In [None]:
new_tickers = sorted(ut.cast_to_yf(new_tickers_alpaca['ticker'].to_list()))

In [None]:
splits = ut.determine_splits(new_tickers, cf.BATCH_SIZE)
split_arr = np.array_split(new_tickers, splits)
new_tickers_info = pd.DataFrame()

count = 1
for array in split_arr:
  print("|---------------| Split {} of {} |---------------|".format(count, splits))
  tmp = ut.get_corporate_info(array, 25, cf.KOI)
  new_tickers_info = new_tickers_info.append(tmp)
  count += 1

In [None]:
new_tickers_info

In [None]:
info_count = len(new_tickers_info[new_tickers_info['sector'] != 'NaN'])
print('Could retrieve additional information on {} out of {} tickers ({:.2f}%)'.format(info_count, len(new_tickers), info_count / len(new_tickers) * 100))

In [None]:
new_tickers = ut.cast_to_alpaca(new_tickers)
tmp = ut.cast_to_alpaca(new_tickers_info['symbol'].tolist()) # cast tickers back to Alpaca format
new_tickers_info.drop('symbol', axis = 1, inplace = True)
new_tickers_info.insert(0, 'ticker', tmp)

In [None]:
new_tickers_df = pd.merge(new_tickers_alpaca, new_tickers_info, on = 'ticker', how = 'outer')
new_tickers_df = new_tickers_df.sort_values(by = ['ticker'])

In [None]:
new_tickers_df = new_tickers_df.replace({np.nan: None})
new_tickers_df

In [None]:
for row in new_tickers_df.itertuples(index = False):
  cursor.execute("INSERT INTO instrument (exchange_id, ticker, name, sector, industry, country, website) VALUES (%s, %s, %s, %s, %s, %s, %s)", row)
conn.commit()
print("|---------------| Data successfully written to database. |---------------|")

### Downloading price histories

In [None]:
new_tickers_df = ut.get_tickers_from_list(new_tickers, conn)
new_tickers_df 

In [None]:
splits = ut.determine_splits(new_tickers, cf.BATCH_SIZE)
new_tickers_list = [new_tickers_df.loc[i : i + cf.BATCH_SIZE - 1, : ] for i in range(0, len(new_tickers_df), cf.BATCH_SIZE)]
new_excepts = []

count = 1
for batch in new_tickers_list:
  print("|---------------| Split {} of {} |---------------|".format(count, splits))
  tmp = ut.write_historical_prices_yf(batch, conn, cursor, None, yesterday)
  new_excepts.append(tmp)
  count += 1

In [None]:
new_excepts = ut.flatten_exceptions(new_excepts)
new_excepts

In [None]:
new_alpaca = pd.DataFrame()
for ticker in new_excepts:
  new_alpaca = new_alpaca.append(new_tickers_df[new_tickers_df['ticker'] == ticker])
ut.write_historical_prices_alpaca(new_alpaca, api, conn, cursor, None, yesterday)

## Existing instruments

In [None]:
yesterday_dt = ut.get_yesterday()
yesterday_dt

In [None]:
tickers_df = ut.get_update_info_from_list(conn, cursor)
tickers_df

In [None]:
tickers_df = tickers_df[~tickers_df['ticker'].isin(new_tickers)] # filter out newly added tickers that are already up to date
tickers_df = tickers_df[tickers_df['last_date'] != yesterday_dt] # filter out those tickers whose last_price date is equal to yesterday's date
tickers_df

In [None]:
if tickers_df.empty:
  print('Prices for existing tickers are already up to date.')

In [None]:
yahoo_tickers_df = tickers_df[tickers_df['vendor_id'] == ut.get_vendor_id('Yahoo Finance', conn)]
yahoo_tickers_df.drop('vendor_id', axis = 1, inplace = True)

# yahoo-ify the tickers
tmp = ut.cast_to_yf(yahoo_tickers_df['ticker'].tolist())
yahoo_tickers_df.drop('ticker', axis = 1, inplace = True)
yahoo_tickers_df.insert(0, 'ticker', tmp)

alpaca_tickers_df = tickers_df[tickers_df['vendor_id'] == ut.get_vendor_id('Alpaca', conn)]
alpaca_tickers_df.drop('vendor_id', axis = 1, inplace = True)

In [None]:
yahoo_tickers_df

In [None]:
alpaca_tickers_df

In [None]:
yf = ut.get_vendor_id('Yahoo Finance', conn)
alp = ut.get_vendor_id('Alpaca', conn)

In [None]:
# Download Yahoo Data
splits = ut.determine_splits(yahoo_tickers_df['ticker'].to_list(), cf.BATCH_SIZE)
yahoo_tickers_list = [yahoo_tickers_df.loc[i : i + cf.BATCH_SIZE - 1, : ] for i in range(0, len(yahoo_tickers_df), cf.BATCH_SIZE)]

count = 1
for batch in yahoo_tickers_list:
  print("|---------------| Split {} of {} |---------------|".format(count, splits))
  ut.update_historical_prices_yf(batch, yf, conn, cursor, yesterday)
  count += 1

In [None]:
# Download Alpaca Data
ut.update_historical_prices_alpaca(alpaca_tickers_df, alp, api, conn, cursor, yesterday)