In [1]:
import os
import sys
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from pandas.tseries.offsets import MonthEnd, BDay, DateOffset
HOME_DIR = os.path.expanduser('~/')
sys.path.append(f"{HOME_DIR}/Documents/Algo/Stock Price DB/")
import StockPriceData as spd
host = 'rpi4'
cached_data = './cached_data'


In [2]:
tickers_df = pd.read_csv(f'{cached_data}/nasdaq100_tickers.csv')
tickers = tickers_df['Ticker'].to_list()

In [3]:
def get_price_data(tickers,start_date,end_date,refresh_data):
    if refresh_data == True:
        tickers_and_prices = {}
        for ticker in tickers[:]:
            tickers_and_prices[ticker] = spd.process_ticker(ticker,start_date,end_date)['Adj_Close']
            # This next line concatenates the series to the dataframe, but doesn't include the series name as the column name.
            #daily_prices = pd.concat([price_data, prices],names=[ticker])
        all_daily_prices_df = pd.concat(tickers_and_prices, axis=1)
        all_daily_prices_df.to_csv(f'{cached_data}/all_daily_prices.csv')
    else:
        all_daily_prices_df = pd.read_csv(f'{cached_data}/all_daily_prices.csv', index_col=['Date'], parse_dates=True)
    # Clean up price df by removing stocks which don't have data for entire period, then calculate daily an monthly returns
    daily_prices_df = all_daily_prices_df.copy()#.dropna(axis=1) # Drop any stocks that are missing values
    symbols_missing_start_data = (daily_prices_df.loc[:, daily_prices_df.iloc[0].isnull()]).columns
    daily_prices_df.drop(columns=symbols_missing_start_data,inplace=True)
    # Add column for cash option for when there are no positive returns 
    daily_prices_df['CASH'] = 0.0 # We set to 0 to simulate 0% return as alternative to other investments when they're negative
    daily_returns = (daily_prices_df.pct_change()+1)[1:] # Adding 1 allows us to take the product of returns. Can't use cumsum though.
    weekly_returns = daily_returns.resample('W').prod() # Need to subtract 1 to get actual % returns.
    monthly_returns = daily_returns.resample('M').prod() # Need to subtract 1 to get actual % returns.
    #benchmark_monthly_returns = monthly_returns.mean(axis=1) # Instead of using index as benchmark, use product of returns for all stocks in universe
    return(daily_prices_df,daily_returns,weekly_returns,monthly_returns)

In [4]:
#Single Stock
#daily_prices_df = spd.process_ticker('AAPL','2010-01-01','2020-01-01')['Adj_Close']
#daily_returns = (daily_prices_df.pct_change()+1)[1:]
#weekly_returns = daily_returns.resample('W').prod() # Need to subtract 1 to get actual % returns.
#monthly_returns = daily_returns.resample('M').prod() # Need to subtract 1 to get actual % returns.

In [5]:
#Multiple Stocks:
start_date = '2010-01-01'
end_date = '2020-01-01'
refresh_data = True #False
daily_prices_df,daily_returns,weekly_returns,monthly_returns = get_price_data(tickers,start_date,end_date,refresh_data)

Retrieving price data for ATVI from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for ATVI (2023-02-17) >= required end date (2020-01-01).
(2516, 8)
Retrieving price data for ADBE from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for ADBE (2023-02-17) >= required end date (2020-01-01).
(2516, 8)
Retrieving price data for ADP from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for ADP (2023-02-17) >= required end date (2020-01-01).
(2516, 8)
Retrieving price data for ABNB from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for ABNB (2023-02-17) >= required end date (2020-01-01).
(0, 8)
Retrieving price data for ALGN from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for ALGN (2023-02-17) >= required end date (2020-01-01).
(2516, 8)
Retrieving price data for GOOGL from 2010-01-01 - 2020-01-01
Connecting to DB on rpi4.
Latest price data for GOOGL (2023-02-17) >= required end date (

In [6]:
daily_prices_df.head(1)

Unnamed: 0_level_0,ATVI,ADBE,ADP,ALGN,GOOGL,GOOG,AMZN,AMD,AEP,AMGN,...,SBUX,SNPS,TMUS,TXN,VRSK,VRTX,WBA,WBD,XEL,CASH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,10.02,37.09,27.49,18.5,15.68,15.61,6.7,9.7,21.14,43.27,...,9.26,22.44,13.21,18.77,29.07,44.24,26.33,15.84,13.6,0.0


In [11]:
#resample_period = 'M'
resample_period = 'W'
#period_returns = monthly_returns
period_returns = weekly_returns
period_count = 0

for date in period_returns.index[:]:
    print(f'\nPeriod end:\t  {date}')
    last_period_end = date # Date for beginning of following month. We should increment this by 1 so we don't overlap with the end of last monthly period
    period_start = last_period_end + DateOffset(days=1) # Increment next period start by one day to avoid overlap with prior period.
    #period_start = last_period_end + BDay(1)#, normalize=True) # Increment next period start by one day to avoid overlap with prior period.
    if resample_period == 'M':
        period_end = date+MonthEnd(1)# Date for end of following month
    elif resample_period == 'W':
        period_end = period_start+BDay(4)#, normalize=True)
    print(f'last_period_end:  {last_period_end}\tperiod_start:  {period_start}\tperiod_end:  {period_end}',flush=True)
    period_count += 1
if period_count != period_returns.shape[0]:
    print(f'Periods: {period_count} != period returns: {period_returns.shape[0]}')
else:
    print(f'Periods: {period_count} == period returns: {period_returns.shape[0]}')



Period end:	  2010-01-10 00:00:00
last_period_end:  2010-01-10 00:00:00	period_start:  2010-01-11 00:00:00	period_end:  2010-01-15 00:00:00

Period end:	  2010-01-17 00:00:00
last_period_end:  2010-01-17 00:00:00	period_start:  2010-01-18 00:00:00	period_end:  2010-01-22 00:00:00

Period end:	  2010-01-24 00:00:00
last_period_end:  2010-01-24 00:00:00	period_start:  2010-01-25 00:00:00	period_end:  2010-01-29 00:00:00

Period end:	  2010-01-31 00:00:00
last_period_end:  2010-01-31 00:00:00	period_start:  2010-02-01 00:00:00	period_end:  2010-02-05 00:00:00

Period end:	  2010-02-07 00:00:00
last_period_end:  2010-02-07 00:00:00	period_start:  2010-02-08 00:00:00	period_end:  2010-02-12 00:00:00

Period end:	  2010-02-14 00:00:00
last_period_end:  2010-02-14 00:00:00	period_start:  2010-02-15 00:00:00	period_end:  2010-02-19 00:00:00

Period end:	  2010-02-21 00:00:00
last_period_end:  2010-02-21 00:00:00	period_start:  2010-02-22 00:00:00	period_end:  2010-02-26 00:00:00

Period end:	

In [None]:
tickers = pd.Series(spd.get_tickers_from_db(host)[0], name='tickers')
tickers.to_csv('./ticker_list_orig.csv')

In [None]:

  
input_str = '21/01/24 11:04:19'
  
dt_object = datetime.strptime(
  input_str, '%d/%m/%y %H:%M:%S')
print("The type of the input date string now is: ", 
      type(dt_object))
  
print("The date is", dt_object)

In [None]:
future_date_after_2yrs = dt_object + \
                        timedelta(days = 730)
 
future_date_after_2days = dt_object + \
                         timedelta(days = 2)
 
# printing calculated future_dates
print('future_date_after_2yrs:', str(future_date_after_2yrs))
print('future_date_after_2days:', str(future_date_after_2days))

In [None]:
just_date =  datetime.date(future_date_after_2days)
print(just_date)

In [None]:
data_folder = '/home/lantro/Documents/Algo Trading/LEAN/data/yahoo/'
ticker = 'spy' #'spy' 'eurusd=x' 'BTC-USD'
warmup_period = 200 # Set this to the length of your longest indicator, i.e. if you calculate a 200 day ma, set it to 200
#df = yf.download(ticker, start='2022-03-17', interval='30m')
#df.to_csv(data_folder+ticker+'.csv')
df = pd.read_csv(f"{data_folder}{ticker}.csv", index_col=[0], parse_dates=True, skipinitialspace=True)
df.rename(columns = {'Close':'close'}, inplace = True)
start_date = '2015-01-01' #Start date for data
end_date = '2022-05-01'
#df = df.loc[start_date-warmup_period:end_date] #Change this so that we use the whole thing and use the start date to specify when we want to start trading so indicators have time to "warm up"

    #To be fair we should note the date when our signals have enough data so we don't exclude dates for B&H return
long_only = False


In [None]:
start_dt_object = datetime.strptime(
  start_date, '%Y-%m-%d').date()-timedelta(days = warmup_period)
print(start_dt_object)

In [None]:
my_dict = {'BIG':[1.2],'MED':[1.05],'BAD':[0],'AWFL':[np.nan]}
my_dict

In [None]:
my_df = pd.DataFrame(my_dict, columns=my_dict.keys())

In [None]:
my_df

In [None]:
top_10 = my_df.loc[0,:].nlargest(10).ge(1)#.index

In [None]:
top_10[top_10.ge(1)].index

In [7]:
def create_new_param_vals(current_val, pct):
    # Take in old value and create new min/max pair based on supplied percentage
    min_max = tuple([int(current_val * (1-pct)),int(current_val * (1+pct))])
    print(f'min_max: {min_max}')
    return(min_max)
create_new_param_vals(100, .5)

min_max: (50, 150)


(50, 150)