In [1]:
%reset -f
import os
import sys

PROJ_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))

sys.path.append(PROJ_PATH) 

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import itertools
import random

import datetime as DT
from datetime import date
from datetime import datetime, timedelta
#import yfinance as yf

import time
from multiprocessing import Pool as ProcessPool 
from multiprocessing.dummy import Pool as ThreadPool

import requests
from io import StringIO
import helper_functions as hfs

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry 

import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")

pd.set_option("display.max_columns", None)  # to see all columns of dataframe
pd.set_option("display.max_rows", None)  # to see all rows of dataframe
pd.set_option("display.max_colwidth", None)  # to see the full (un-truncated) dataframe field/cell value 

In [2]:
import configparser

env_config = configparser.ConfigParser()
env_config.read(f'{PROJ_PATH}/kye/env_config.ini')


#env_config.read('./kye/env_config_backtest.ini')

# <<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>> #
SPLIT_DATE = env_config['ENV_VARS']['SPLIT_DATE']
 
config = {}
config['api_key'] = env_config['KEYS']['DATA_KEY']
ML_DATA_FOLDER_PATH = env_config['LOCAL_PATHS']['ML_DATA_FOLDER_PATH']
PROCESSING_TEMP_FOLDER_PATH = env_config['LOCAL_PATHS']['PROCESSING_TEMP_FOLDER_PATH'] 
current_portfolio = env_config['PORTFOLIO']['CURRENT_PORTFOLIO'].split(",")
recent_imp_tickers = env_config['PORTFOLIO']['recent_imp_tickers'].split(",")

market_scenarios = [float(x) for x in env_config['MARKET_SCENARIOS']['SCENARIOS_LIST'].split(",")]

#@@@@@@ **********************<<<<<<<<<<<<#

print("SPLIT_DATE: ", SPLIT_DATE)
today = str(date.today())
FROM_DATE = pd.to_datetime(date.today(), format='%Y-%m-%d') - DT.timedelta(days=((365*2) + 100)) # "2016-01-01"
to_date = str(date.today())
print("from date:", FROM_DATE)
 
res_exchanges = requests.get("https://eodhistoricaldata.com/api/exchanges-list?api_token=" + config['api_key'])
res_exchanges = res_exchanges.json() 
exchanges = pd.json_normalize(res_exchanges) 
print(exchanges.shape)
exchanges.head(2) 

SPLIT_DATE:  2023-08-09
from date: 2021-05-01 00:00:00
(77, 7)


Unnamed: 0,Name,Code,OperatingMIC,Country,Currency,CountryISO2,CountryISO3
0,USA Stocks,US,"XNAS, XNYS",USA,USD,US,USA
1,London Exchange,LSE,XLON,UK,GBP,GB,GBR


# Get Ticker list

In [3]:
def get_ticker_list(exchange_Code = "US", api_token = config['api_key'], session=None):

    if session is None:

        session = requests.Session()
        
        url = "https://eodhistoricaldata.com/api/exchange-symbol-list/"+ exchange_Code + "?api_token=" + config['api_key']
               
        r = session.get(url)

        if r.status_code == requests.codes.ok:

            df = pd.read_csv(StringIO(r.text), skipfooter=0, engine='python').reset_index(drop=True)

            return df

        else: 
            raise Exception(r.status_code, r.reason, url) 

            
# Download USA stocks data here:
# https://eodhistoricaldata.com/api/exchange-symbol-list/US?api_token=5f67cd8054e893.64203500
# !pip install git+https://github.com/femtotrader/python-eodhistoricaldata.git

USA_tickers = get_ticker_list(exchange_Code = "US", api_token = config['api_key'])
print(USA_tickers.shape)
USA_tickers.tail(2) 

(49808, 7)


Unnamed: 0,Code,Name,Country,Exchange,Currency,Type,Isin
49806,ZZLL,ZZLL Information Technology Inc,USA,PINK,USD,Common Stock,US98880P2020
49807,ZZZOF,Zinc One Resources Inc,USA,PINK,USD,Common Stock,


In [4]:
USA_tickers['Exchange'].value_counts()

NMFQS        25679
PINK          9825
NASDAQ        5435
NYSE          3022
NYSE ARCA     2073
OTCQB         1172
BATS           634
OTCQX          607
OTCGREY        509
OTCCE          478
NYSE MKT       313
OTCMKTS         46
US               6
AMEX             5
Name: Exchange, dtype: int64

In [5]:
stocks_NYSE_NASDAQ = USA_tickers[ ( USA_tickers['Exchange'].isin(['NYSE','NASDAQ','NYSE ARCA','NYSE MKT'
                                                                  ]) ) 
                           & ( USA_tickers['Type'].isin(['Common Stock']) ) 
                         ].reset_index(drop=True)
stocks_NYSE_NASDAQ.rename(columns={"Name" : "company_Name"} , inplace=True)
print(stocks_NYSE_NASDAQ.shape) 
stocks_NYSE_NASDAQ.head(2) 

(7443, 7)


Unnamed: 0,Code,company_Name,Country,Exchange,Currency,Type,Isin
0,A,Agilent Technologies Inc,USA,NYSE,USD,Common Stock,US00846U1016
1,AA,Alcoa Corp,USA,NYSE,USD,Common Stock,US0138721065


In [6]:
stocks_NYSE_NASDAQ['Exchange'].value_counts()

NASDAQ       4622
NYSE         2470
NYSE MKT      278
NYSE ARCA      73
Name: Exchange, dtype: int64

# Historical Prices

In [7]:
def get_stock_history_eod(symbol="AAPL", from_yyyy_mm_dd = "all", to_yyyy_mm_dd = "all", api_token = config['api_key']):
     
    #print(f"symbol: {symbol}")
    if from_yyyy_mm_dd != "all":
        url = "https://eodhistoricaldata.com/api/eod/" + symbol + ".US?from="+from_yyyy_mm_dd+"&to="+to_yyyy_mm_dd+"&api_token=" + config['api_key'] + "&period=d"
    else:
        url = "https://eodhistoricaldata.com/api/eod/" + symbol + ".US?&api_token=" + config['api_key'] + "&period=d"

    #print(url)
    r = hfs.requests_retry_session().get(url)
    c = 0
    while r.status_code != requests.codes.ok:

        if r.status_code == 429: 
            #print("waiting 5 sec for ", symbol)
            if c == 5:
                print("c == 5 breaking", symbol)
                break
            time.sleep(random.randint(3, 6))
            try:
                r = hfs.requests_retry_session().get(url)
                c+=1
            except:
                continue

        else:
            print(symbol, " breaking : " + str(r.status_code) + " <> status code") 
            break
            #return pd.DataFrame([{'Ticker' : symbol,  'Open' : r.reason,  'Date' : r.status_code,  'Close': url}]).reset_index(drop=True)
            print("******* raising Exception *****",type(r.status_code), r.status_code)
            raise Exception(symbol, r.status_code, r.reason, url) 

    else:

        df = pd.read_csv(StringIO(r.text), skipfooter=0, engine='python').reset_index(drop=True) # 
        df['Ticker'] = symbol
        return df.reset_index(drop=True) 

### Getting Daily OHLCV

In [9]:
print(stocks_NYSE_NASDAQ['Code'].nunique()) 
tickers = [x for x in stocks_NYSE_NASDAQ['Code'].unique() if x==x]
print(len(tickers))
print("total #tickers: ", len(tickers))

args = ((symbol,      
        str(FROM_DATE).split()[0],   # From date
         str(date.today()), # to date
        config['api_key']) for symbol in tickers )

stocks_daily_ohlcv = hfs.get_data_from_API(tickers = tickers,
                                          func_to_run = get_stock_history_eod,
                                          func_args = args ) 

print("unique #tickers: ", stocks_daily_ohlcv['Ticker'].nunique()) 
print(stocks_daily_ohlcv.shape) 
stocks_daily_ohlcv.tail(2) 

7442
7442
total #tickers:  7442
chunk  1 :  995
chunk  2 :  995
chunk  3 :  995
chunk  4 :  995
chunk  5 :  995
chunk  6 :  995
chunk  7 :  995
chunk  8 :  477
total time:  586.5408220291138
returning df
unique #tickers:  7438
(3774182, 9)


Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker,Value
3774180,2023-08-08,8.01,8.02,7.69,7.76,7.76,373000.0,ZYXI,
3774181,2023-08-09,7.78,8.09,7.63,7.69,7.69,267016.0,ZYXI,


In [10]:
stocks_daily_ohlcv['day_of_week'] = pd.to_datetime(stocks_daily_ohlcv['Date'])
stocks_daily_ohlcv['day_of_week'] = [x.weekday() for x in stocks_daily_ohlcv['day_of_week']]
stocks_daily_ohlcv = stocks_daily_ohlcv[stocks_daily_ohlcv['day_of_week']<=4].reset_index(drop=True)
del stocks_daily_ohlcv['day_of_week']
stocks_daily_ohlcv.head(2) 

Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker,Value
0,2021-05-03,134.29,134.3099,132.61,133.19,131.3439,1032021.0,A,
1,2021-05-04,132.12,132.51,130.35,130.9,129.0856,1752428.0,A,


In [11]:
active_tickers = stocks_daily_ohlcv['Ticker'][stocks_daily_ohlcv['Date']==SPLIT_DATE].unique()
stocks_daily_ohlcv = stocks_daily_ohlcv[stocks_daily_ohlcv['Ticker'].isin([x for x in active_tickers])].reset_index(drop=True)
print(stocks_daily_ohlcv.shape)
stocks_daily_ohlcv.head(2)
len(active_tickers) 

(3484215, 9)


6671

In [12]:
stocks_daily_ohlcv[stocks_daily_ohlcv['Date']==SPLIT_DATE].shape 

(6671, 9)

In [13]:
stocks_daily_ohlcv = pd.merge(stocks_daily_ohlcv, stocks_NYSE_NASDAQ[['Code', 'company_Name','Exchange']], 
                              how = "left", left_on=['Ticker'] , right_on = ['Code']) 
del stocks_daily_ohlcv['Code'] 
print(stocks_daily_ohlcv.shape) 
stocks_daily_ohlcv.tail(2) 

(3484215, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker,Value,company_Name,Exchange
3484213,2023-08-08,8.01,8.02,7.69,7.76,7.76,373000.0,ZYXI,,Zynex Inc,NASDAQ
3484214,2023-08-09,7.78,8.09,7.63,7.69,7.69,267016.0,ZYXI,,Zynex Inc,NASDAQ


In [14]:
stocks_daily_ohlcv = stocks_daily_ohlcv[stocks_daily_ohlcv['Ticker'].apply(lambda x: '-' not in x)].reset_index(drop=True)

In [15]:
stocks_daily_ohlcv = stocks_daily_ohlcv[-pd.isnull(stocks_daily_ohlcv['Close'])].reset_index(drop=True) 
print(stocks_daily_ohlcv.shape) 
stocks_daily_ohlcv.tail(2) 

(3388750, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker,Value,company_Name,Exchange
3388748,2023-08-08,8.01,8.02,7.69,7.76,7.76,373000.0,ZYXI,,Zynex Inc,NASDAQ
3388749,2023-08-09,7.78,8.09,7.63,7.69,7.69,267016.0,ZYXI,,Zynex Inc,NASDAQ


In [16]:
stocks_daily_ohlcv.head(2) 

Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker,Value,company_Name,Exchange
0,2021-05-03,134.29,134.3099,132.61,133.19,131.3439,1032021.0,A,,Agilent Technologies Inc,NYSE
1,2021-05-04,132.12,132.51,130.35,130.9,129.0856,1752428.0,A,,Agilent Technologies Inc,NYSE


In [17]:
#stocks_daily_ohlcv.rename(columns={"company_Name_x" : "company_Name"}, inplace=True)

### OHLCV Adjust for splits

In [18]:
print("SPLIT_DATE: ", SPLIT_DATE)
# Adjust for Splits 

stocks_daily_ohlcv['close_to_adj_close_ratio'] = (stocks_daily_ohlcv['Close'] / stocks_daily_ohlcv['Adjusted_close']) #.astype(int) 

stocks_daily_ohlcv['Adjusted_Open'] = np.where(stocks_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          stocks_daily_ohlcv['Open'],
                                          stocks_daily_ohlcv['Open'] / stocks_daily_ohlcv['close_to_adj_close_ratio'])

stocks_daily_ohlcv['Adjusted_High'] = np.where(stocks_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          stocks_daily_ohlcv['High'],
                                          stocks_daily_ohlcv['High'] / stocks_daily_ohlcv['close_to_adj_close_ratio'])

stocks_daily_ohlcv['Adjusted_Low'] = np.where(stocks_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          stocks_daily_ohlcv['Low'],
                                          stocks_daily_ohlcv['Low'] / stocks_daily_ohlcv['close_to_adj_close_ratio'])

stocks_daily_ohlcv['Adjusted_Volume'] = np.where(stocks_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          stocks_daily_ohlcv['Volume'],
                                          stocks_daily_ohlcv['Volume'] * stocks_daily_ohlcv['close_to_adj_close_ratio'])

stocks_daily_ohlcv = stocks_daily_ohlcv[['company_Name','Exchange','Ticker', 'Date', 'Adjusted_Open', 'Adjusted_High', 'Adjusted_Low', 
                                         'Adjusted_close', 'Adjusted_Volume', 'close_to_adj_close_ratio']] 
print(stocks_daily_ohlcv.shape)
stocks_daily_ohlcv[stocks_daily_ohlcv["Ticker"]=="AAPL"].tail(2) 

SPLIT_DATE:  2023-08-09
(3388750, 10)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio
8390,Apple Inc,NASDAQ,AAPL,2023-08-08,179.69,180.27,177.58,179.8,67823000.0,1.0
8391,Apple Inc,NASDAQ,AAPL,2023-08-09,180.87,180.93,177.01,178.19,56570198.0,1.0


In [19]:
stocks_daily_ohlcv = stocks_daily_ohlcv[stocks_daily_ohlcv['Ticker']!='GFN'].reset_index(drop=True)

In [20]:
stocks_daily_ohlcv['Ticker'].nunique() 

6479

In [21]:
stocks_daily_ohlcv.head(2) 

Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio
0,Agilent Technologies Inc,NYSE,A,2021-05-03,132.428653,132.448277,130.771939,131.3439,1046527.0,1.014055
1,Agilent Technologies Inc,NYSE,A,2021-05-04,130.28869,130.673284,128.543224,129.0856,1777060.0,1.014056


In [22]:
todays_price = stocks_daily_ohlcv[stocks_daily_ohlcv['Date']==SPLIT_DATE].reset_index(drop=True)
print(todays_price.shape)
todays_price.tail(2)

(6479, 10)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio
6477,Zynerba Pharmaceuticals Inc,NASDAQ,ZYNE,2023-08-09,0.359,0.36,0.3338,0.3338,83192.0,1.0
6478,Zynex Inc,NASDAQ,ZYXI,2023-08-09,7.78,8.09,7.63,7.69,267016.0,1.0


In [23]:
print(todays_price.shape) 
todays_price.tail(2) 

(6479, 10)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio
6477,Zynerba Pharmaceuticals Inc,NASDAQ,ZYNE,2023-08-09,0.359,0.36,0.3338,0.3338,83192.0,1.0
6478,Zynex Inc,NASDAQ,ZYXI,2023-08-09,7.78,8.09,7.63,7.69,267016.0,1.0


In [24]:
todays_price['Exchange'].value_counts()

NASDAQ       3941
NYSE         2227
NYSE MKT      262
NYSE ARCA      49
Name: Exchange, dtype: int64

In [31]:
N_days = 20
unique_dates_list = [x for x in stocks_daily_ohlcv['Date'].unique()]
split_date_index = unique_dates_list.index(SPLIT_DATE)
N_days_dates = unique_dates_list[split_date_index-(N_days-1):split_date_index+1]
N_days_dates 

['2023-07-13',
 '2023-07-14',
 '2023-07-17',
 '2023-07-18',
 '2023-07-19',
 '2023-07-20',
 '2023-07-21',
 '2023-07-24',
 '2023-07-25',
 '2023-07-26',
 '2023-07-27',
 '2023-07-28',
 '2023-07-31',
 '2023-08-01',
 '2023-08-02',
 '2023-08-03',
 '2023-08-04',
 '2023-08-07',
 '2023-08-08',
 '2023-08-09']

In [32]:
stocks_daily_ohlcv['Ticker'].unique()[0:5]

array(['A', 'AA', 'AAC', 'AACG', 'AACIW'], dtype=object)

In [33]:
eligible_tickers = stocks_daily_ohlcv["Ticker"][(stocks_daily_ohlcv['Date'].apply(lambda x: x in N_days_dates)) 
                                               ].unique()
len(eligible_tickers) 

6479

In [34]:
stocks_daily_ohlcv['day_price'] = ( (0.1 * stocks_daily_ohlcv['Adjusted_Open']) + 
                                    (0.1 * stocks_daily_ohlcv['Adjusted_High']) + 
                                    (0.1 * stocks_daily_ohlcv['Adjusted_Low']) + 
                                    (0.7 * stocks_daily_ohlcv['Adjusted_close']) )

stocks_daily_ohlcv.head(2) 

Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio,day_price
0,Agilent Technologies Inc,NYSE,A,2021-05-03,132.428653,132.448277,130.771939,131.3439,1046527.0,1.014055,131.505617
1,Agilent Technologies Inc,NYSE,A,2021-05-04,130.28869,130.673284,128.543224,129.0856,1777060.0,1.014056,129.31044


In [35]:
stocks_daily_ohlcv.tail(2) 

Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio,day_price
3388748,Zynex Inc,NASDAQ,ZYXI,2023-08-08,8.01,8.02,7.69,7.76,373000.0,1.0,7.804
3388749,Zynex Inc,NASDAQ,ZYXI,2023-08-09,7.78,8.09,7.63,7.69,267016.0,1.0,7.733


In [36]:
stocks_daily_ohlcv['Vol_price'] = stocks_daily_ohlcv['day_price'] * stocks_daily_ohlcv['Adjusted_Volume'] 

stocks_daily_ohlcv[f'Adjusted_Volume_1d'] = stocks_daily_ohlcv.groupby("Ticker")['Adjusted_Volume'].shift(1)
stocks_daily_ohlcv[f'Adj_Vol_EMA_prev_20d'] = stocks_daily_ohlcv['Adjusted_Volume_1d'].rolling(20).mean().round(decimals=3)

def get_last_N_days(df, N_days):
    unique_dates_list = [x for x in df['Date'].unique()]
    split_date_index = unique_dates_list.index(SPLIT_DATE)
    N_days_dates = unique_dates_list[split_date_index-(N_days-1):split_date_index+1]
    return N_days_dates 

stocks_daily_ohlcv = hfs.get_QM_ADR(stocks_daily_ohlcv, window_size = 5 ,high_col = "Adjusted_High", low_col = "Adjusted_Low", close_col = "Adjusted_close")
stocks_daily_ohlcv = hfs.get_QM_ADR(stocks_daily_ohlcv, window_size = 20 ,high_col = "Adjusted_High", low_col = "Adjusted_Low", close_col = "Adjusted_close")

ticker_with_vol_criteria = stocks_daily_ohlcv[ (stocks_daily_ohlcv['Date'].isin( get_last_N_days(stocks_daily_ohlcv,550)) ) &
                                               (stocks_daily_ohlcv['Adj_Vol_EMA_prev_20d'] > 150000 ) & 
                                               (stocks_daily_ohlcv['Vol_price'] >= 25000000 ) &
                                               (stocks_daily_ohlcv['Adjusted_Volume'] >= 15000000 ) ]['Ticker'].unique()

print(f'ticker_with_vol_criteria: {len(ticker_with_vol_criteria)}')

stocks_daily_ohlcv_with_Vol = stocks_daily_ohlcv[stocks_daily_ohlcv['Ticker'].isin(ticker_with_vol_criteria)
                                                         ].reset_index(drop=True)

ticker_with_price_criteria = stocks_daily_ohlcv_with_Vol[ 
                            (stocks_daily_ohlcv_with_Vol['Date'].isin(get_last_N_days(stocks_daily_ohlcv_with_Vol,550))) &
                            (stocks_daily_ohlcv_with_Vol['Adjusted_close'] > 0.9 ) & 
                            (stocks_daily_ohlcv_with_Vol['Adjusted_close'] <= 70) 
                          ]['Ticker'].unique()

print(f'ticker_with_price_criteria: {len(ticker_with_price_criteria)}')

stocks_daily_ohlcv_with_VolPrice = stocks_daily_ohlcv_with_Vol[stocks_daily_ohlcv_with_Vol['Ticker'].isin(ticker_with_price_criteria)
                                                         ].reset_index(drop=True)

eligible_tickers = stocks_daily_ohlcv_with_VolPrice[
                 (stocks_daily_ohlcv_with_VolPrice['Date'].isin(get_last_N_days(stocks_daily_ohlcv_with_VolPrice,550))) &
                 (stocks_daily_ohlcv_with_VolPrice['ADR_percent_20d'] > 3 ) &
                 ( stocks_daily_ohlcv_with_VolPrice['Adjusted_Volume'] >= 1000000 )  

             ]["Ticker"].unique()

len(eligible_tickers) 

ticker_with_vol_criteria: 1542
ticker_with_price_criteria: 1463


1427

In [37]:
eligible_tickers = [x for x in eligible_tickers] + current_portfolio + recent_imp_tickers
eligible_tickers = list(set(eligible_tickers))
print(len(eligible_tickers)) 
stocks_daily_ohlcv_eligible = stocks_daily_ohlcv[stocks_daily_ohlcv['Ticker'].isin(eligible_tickers)].reset_index(drop=True)
print(stocks_daily_ohlcv_eligible.shape) 
stocks_daily_ohlcv_eligible.tail(2) 

1432
(778676, 16)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio,day_price,Vol_price,Adjusted_Volume_1d,Adj_Vol_EMA_prev_20d,ADR_percent_5d,ADR_percent_20d
778674,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-08,7.23,7.46,7.17,7.29,394500.0,1.0,7.289,2875510.5,430800.0,627680.0,4.615,4.137
778675,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-09,7.29,7.725,7.29,7.7,514001.0,1.0,7.62,3916944.62,394500.0,615825.0,4.804,4.232


In [38]:
stocks_daily_ohlcv_eligible = stocks_daily_ohlcv_eligible[ -( (stocks_daily_ohlcv_eligible['Adjusted_Open']==0) & 
                                                              (stocks_daily_ohlcv_eligible['Adjusted_High']==0) & 
                                                              (stocks_daily_ohlcv_eligible['Adjusted_Low']==0) & 
                                                              (stocks_daily_ohlcv_eligible['Adjusted_close']==0)) ].reset_index(drop=True)
print(stocks_daily_ohlcv_eligible.shape)
stocks_daily_ohlcv_eligible.tail(2) 

(778676, 16)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio,day_price,Vol_price,Adjusted_Volume_1d,Adj_Vol_EMA_prev_20d,ADR_percent_5d,ADR_percent_20d
778674,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-08,7.23,7.46,7.17,7.29,394500.0,1.0,7.289,2875510.5,430800.0,627680.0,4.615,4.137
778675,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-09,7.29,7.725,7.29,7.7,514001.0,1.0,7.62,3916944.62,394500.0,615825.0,4.804,4.232


In [39]:
stocks_daily_ohlcv_eligible['Ticker'].nunique() 

1432

In [40]:
Tickers_with_eligible_lifetime = stocks_daily_ohlcv_eligible.groupby(['Ticker'])['Date'].size().reset_index(name='periods')
print(Tickers_with_eligible_lifetime.shape)
Tickers_with_eligible_lifetime = Tickers_with_eligible_lifetime[Tickers_with_eligible_lifetime['periods']>=390].reset_index(drop=True)
print(Tickers_with_eligible_lifetime[Tickers_with_eligible_lifetime['periods']>=390].shape)
Tickers_with_eligible_lifetime.tail(2) 

(1432, 2)
(1341, 2)


Unnamed: 0,Ticker,periods
1339,ZVZZT,570
1340,ZYME,572


# get all_stocks_ohlcv for ranking from here

In [41]:
print(stocks_daily_ohlcv_eligible.shape)
stocks_daily_ohlcv_eligible = stocks_daily_ohlcv_eligible[stocks_daily_ohlcv_eligible['Ticker'].\
                              isin(Tickers_with_eligible_lifetime['Ticker'].unique())].reset_index(drop=True)

eligible_tickers = stocks_daily_ohlcv_eligible['Ticker'].unique()
print(len(eligible_tickers)) 
print(stocks_daily_ohlcv_eligible.shape)
stocks_daily_ohlcv_eligible.tail(2) 

(778676, 16)
1341
(756140, 16)


Unnamed: 0,company_Name,Exchange,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,close_to_adj_close_ratio,day_price,Vol_price,Adjusted_Volume_1d,Adj_Vol_EMA_prev_20d,ADR_percent_5d,ADR_percent_20d
756138,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-08,7.23,7.46,7.17,7.29,394500.0,1.0,7.289,2875510.5,430800.0,627680.0,4.615,4.137
756139,Zymeworks Inc. Common Stock,NASDAQ,ZYME,2023-08-09,7.29,7.725,7.29,7.7,514001.0,1.0,7.62,3916944.62,394500.0,615825.0,4.804,4.232


In [42]:
stocks_daily_ohlcv_eligible = stocks_daily_ohlcv_eligible[-stocks_daily_ohlcv_eligible['Ticker'].isin( \
                ['BIPH','DNAD', 'GRPH', 'SPIR', 'ZWZZT', 'ZVZZT' ])].reset_index(drop=True) 

In [44]:
stocks_daily_ohlcv_eligible1 = stocks_daily_ohlcv_eligible[['company_Name','Ticker', 'Date', 'Adjusted_Open', 'Adjusted_High','Adjusted_Low', 'Adjusted_close', 'Adjusted_Volume', 'Exchange' ,'close_to_adj_close_ratio']]
print(stocks_daily_ohlcv_eligible1.shape)
print(stocks_daily_ohlcv_eligible1['Ticker'].nunique())
stocks_daily_ohlcv_eligible1.to_pickle(f'{PROJ_PATH}/{ML_DATA_FOLDER_PATH}/all_stocks_ohlcv.pkl') 

(755570, 10)
1340


In [None]:
%reset -f 