In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

from pylab import rcParams
rcParams['figure.figsize'] = 12,8

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

from datetime import datetime, timedelta

import re

import os
from glob import glob
from tqdm import tqdm

import yaml
from yaml import dump
import uuid
import itertools

In [3]:
import xgboost
from xgboost import XGBClassifier, XGBRegressor

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier

from lightgbm import LGBMClassifier, LGBMRegressor

In [4]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [5]:
import warnings
warnings.simplefilter(action="ignore")

## Initialize global variables and functions

__here we will set all the variables and functions used in both classification and regression problems__

In [6]:
date_parse = lambda dates: pd.to_datetime(dates)
    
companies = ["AMAZON", "APPLE", "GOOGLE", "META", "NETFLIX"]
time_period = ["daily"]

full_sets_orig = {}

for company, period in itertools.product(companies, time_period):
    path = f"/diploma_info/datalake/processed_data/{company}_{period}.csv"
#     path = f"/diploma_info/datalake/processed_data/{company}_{period}_2024-04-30.csv"

    full_sets_orig[company.lower()] = pd.read_csv(
        path,
        parse_dates=["date"],
        date_parser=date_parse,
        index_col=["date"],
    )

    
set_1 = full_sets_orig[companies[0].lower()]
    
train_end = set_1.iloc[-3].name
test_start = train_end + timedelta(days=1)

test_start = datetime(set_1.loc[test_start:].index[0].year, set_1.loc[test_start:].index[0].month, set_1.loc[test_start:].index[0].day)
train_end = datetime(set_1.loc[:train_end].index[-1].year, set_1.loc[:train_end].index[-1].month, set_1.loc[:train_end].index[-1].day)

test_end = test_start + timedelta(days=1)


forecast_steps = 9

full_sets = full_sets_orig.copy()

for company in companies:
    full_sets[company.lower()] = pd.concat([full_sets[company.lower()].loc[:test_end], pd.DataFrame(None, index=pd.date_range(test_start, test_end+timedelta(days=8), freq='B'))]).ffill()
    full_sets[company.lower()] = full_sets[company.lower()][~full_sets[company.lower()].index.duplicated(keep='first')]
    full_sets[company.lower()]['year'] = full_sets[company.lower()].index.year
    full_sets[company.lower()]['month'] = full_sets[company.lower()].index.month
    full_sets[company.lower()]['day'] = full_sets[company.lower()].index.day
    full_sets[company.lower()]['day_of_week'] = full_sets[company.lower()].index.weekday
    full_sets[company.lower()]['week_of_year'] = (full_sets[company.lower()].index.isocalendar()['week']).astype('int')

In [7]:
test_start, test_end

(datetime.datetime(2024, 5, 13, 0, 0), datetime.datetime(2024, 5, 14, 0, 0))

In [8]:
def data(X_full_set, y_full_set, train_start, config, forecast_steps):
    
    X_train = X_full_set.loc[train_start:config["train_end"]]
    X_test = X_full_set.loc[config["test_start"]:
                    config["test_start"]+timedelta(days=forecast_steps)]
    y_train = y_full_set.loc[train_start:config["train_end"]]
    y_test = y_full_set.loc[config["test_start"]:
                    config["test_start"]+timedelta(days=forecast_steps)]
    
    return X_train, X_test, y_train, y_test

In [9]:
def standardize_mean_values(df_test, df_train, full_set, config, problem):
    
    agg_cols = [col for col in config['train_features'] if col.endswith('_lag_1')] + \
               ['diff_open_value', 'open-prev_close', 'diff_close_value', 'growth_open'] + \
               [col for col in config['train_features'] if "close_m" in col]
    
    for agg in agg_cols:
        if agg in df_test.columns:
            try:
                num = df_test.loc[config["test_start"], agg]

            except KeyError as e:
                num = df_train[agg].iloc[-1]

            finally:

                _df = df_test.loc[config["test_start"]:, agg]
                _df = _df.replace(_df.values, num)

#                 print(df_test.loc[config["test_start"]+timedelta(days=day):, agg], _df.values.ravel())

                df_test.loc[config["test_start"]:, agg] = _df.values.ravel()
    
    if 'open' in config['train_features']:
        
        idx_1 = df_test.iloc[1].name
        df_test.loc[idx_1, 'open'] = full_set.loc[idx_1, 'new_open']
            
        _  = df_test.loc[idx_1+timedelta(days=1):]
        _coef = full_set.loc[idx_1-timedelta(days=7):idx_1, 'diff_open_value_mean_3_days'].mean()
        
        for n in range(_.shape[0]):
            
            if problem == 'regression':
                if _.iloc[n]['growth'] == 0:
                    if _coef > 0:
                        _coef = - _coef
                    else:
                        pass
                else:
                    pass
            
            _date = _.iloc[n].name
            df_test.loc[_date, 'open'] = df_test.loc[df_test.iloc[1+n].name, 'open'] + _coef
        
    print(df_test)
    
    return df_test

In [10]:
def add_predictions(problem, company, df_preds, y_pred_df, config, n):
    
    if problem == 'regression':
        
        dates = y_pred_df.index

        for date in dates:
            step_day = int((date-config["test_start"]).days)
            df_preds.loc[date.strftime("%Y-%m-%d"), f'{company}'] = y_pred_df.loc[date.strftime("%Y-%m-%d"), 0]
    
    elif problem == 'classification':
        
        dates = y_pred_df.index[n:]

        for date in dates:
            step_day = int((date-config["test_start"]).days)
            df_preds.loc[date.strftime("%Y-%m-%d"), f'{company}'] = y_pred_df.loc[date.strftime("%Y-%m-%d"), 0]

    return df_preds

In [11]:
def run_wfv(full_set: pd.DataFrame, config: dict, forecast_steps: int, company: str, models_list: dict, problem: str, df_preds_c: pd.Series):
    
    if problem == 'regression':
        X_full_set = full_set.loc[:, config['train_features']+['growth']].sort_index()
        X_full_set.loc[df_preds_c.index, 'growth'] = df_preds_c.values.ravel()
    else:
        X_full_set = full_set.loc[:, config['train_features']].sort_index()
    y_full_set = full_set.loc[:, [config['target_column']]].sort_index()
    
    if X_full_set.shape[0] != y_full_set.shape[0]:
        common_index = list(set(X_full_set.index) & set(y_full_set.index))
        common_index.sort()
        X_full_set = X_full_set.loc[common_index, :]
        y_full_set = y_full_set.loc[common_index, :]
    print(X_full_set.shape, y_full_set.shape)
    
    print(X_full_set)
    
    count_days = (test_end - test_start).days + 1
    print('count_days = ', count_days)
    
    
    model_name = config['model_name']
    print(model_name)
    
    model = config['model']

    unique_uuid = config['unique_uuid']        
        
        
    train_start = config.get('train_start', None)
    if train_start is None:
        if config.get('duration_training_history', None) is None:
            train_start = X_full_set.index[0]
        else:
            train_start = config['train_end'] - timedelta(days=config['duration_training_history'])
            
#     try:

    X_train, X_test, y_train, y_test = data(X_full_set, y_full_set, train_start, config, forecast_steps)
    X_test = standardize_mean_values(X_test.copy(), X_train, full_set, config, problem)

    y_pred = model.fit(X_train, y_train).predict(X_test)

    y_pred_df = pd.DataFrame(y_pred, index=y_test.index)

    return y_pred_df

        
#     except KeyError as e:
#         print(e)
#         pass

#     except ValueError as e:
#         print(e)
#         pass

## CLASSIFICATION

__forecast whether the price will rise or fall in next several days__

In [12]:
problem = 'classification'
models_list = ['xgboost', 'lightgbm', 'random_forest', 'knear_neighbors']

paths_to_configs = []
configs = {}

models = {
    'amazon': {
       'd-0': 'b121bfdb-0c51-11ef-9424-c0e434d84b22',
       'd-1': 'cae1a9b6-0e08-11ef-a086-c0e434d84b22',
       'd-2': '181a189f-07ea-11ef-854b-c0e434d84b22',
       'd-3': 'a559b477-089f-11ef-9dc4-c0e434d84b22',
       'd-4': 'a559db8f-089f-11ef-9d27-c0e434d84b22'
    },
    'apple': {
        'd-0': '0b4ed81f-0c57-11ef-abf1-c0e434d84b22',
        'd-1': 'bc1bbe19-07f1-11ef-be70-c0e434d84b22',
        'd-2': 'bc1bbded-07f1-11ef-a902-c0e434d84b22',
        'd-3': 'bc1bbde7-07f1-11ef-b1dc-c0e434d84b22',
        'd-4': 'bc1bbdd8-07f1-11ef-97f8-c0e434d84b22'
    },
    'meta': {
        'd-0': '94460d9a-0c59-11ef-8cce-c0e434d84b22',
        'd-1': '80337013-08a9-11ef-b1b3-c0e434d84b22',
        'd-2': 'faf01422-0e11-11ef-8908-c0e434d84b22',
        'd-3': '17f713e3-1116-11ef-8410-c0e434d84b22',
        'd-4': '8033701f-08a9-11ef-a212-c0e434d84b22'
    },
    'google': {
        'd-0': 'fad9937e-0c58-11ef-887f-c0e434d84b22',
        'd-1': 'be8ae732-08a8-11ef-82dc-c0e434d84b22', 
        'd-2': 'be8ae732-08a8-11ef-82dc-c0e434d84b22', 
        'd-3': 'be8ae732-08a8-11ef-82dc-c0e434d84b22', 
        'd-4': '0cf7d457-07f3-11ef-8091-c0e434d84b22'
    },
    'netflix': {
        'd-0': 'd7c080af-0c5b-11ef-8957-c0e434d84b22',
        'd-1': 'd120abe1-07f5-11ef-be3e-c0e434d84b22',
        'd-2': 'c023ad05-08ac-11ef-bb81-c0e434d84b22',
        'd-3': '98a48d60-0e14-11ef-9a51-c0e434d84b22',
        'd-4': 'c023acff-08ac-11ef-9de2-c0e434d84b22'
    }
}

for i in range(len(models.items())):  
    comp = list(models.keys())[i]
    for d, m in list(models[comp].items()):
        paths_to_configs = glob(f'/diploma_info/datalake/wf_result/{problem}/{comp}/daily/*/research_task_*/*_{m}.yaml')[0]
    
        with open(paths_to_configs, 'r') as f:
            configs[f'{comp}_{d}'] = yaml.safe_load(f)

for config in list(configs.values()):
    config['train_end'] = train_end
    config['test_start'] = test_start
    config['test_end'] = test_end
    if config['model_name'] == 'knear_neighbors':
        config['model'] = KNeighborsClassifier(**config['model_hyperparameters'])
    elif config['model_name'] == 'xgboost':
        config['model'] = XGBClassifier(**config['model_hyperparameters'])
    elif config['model_name'] == 'random_forest':
        config['model'] = RandomForestClassifier(**config['model_hyperparameters'])
    elif config['model_name'] == 'lightgbm':
        config['model'] = LGBMClassifier(**config['model_hyperparameters'])

In [13]:
df_preds_c = pd.DataFrame()

for company in companies:
    company = company.lower()
    
    full_set = full_sets[company]
    print(f'company: {company}')
    
    _ = [(e.split('-')[-1], configs[e]) for e in [i for i in list(configs.keys()) if i.startswith(company)]]
    print(f'count_configs {len(_)} \n')
    
    for n, config in _:
        print("day = ", n, '\n', config['model_name'], '==', config, '\n')
        
        y_pred_df = run_wfv(full_set, config, forecast_steps, company, models_list, problem, df_preds_c = None)
        
        df_preds_c = add_predictions(problem, company, df_preds_c, y_pred_df, config, int(n))

company: amazon
count_configs 5 

day =  0 
 random_forest == {'company': 'amazon', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'max_depth': 7, 'n_estimators': 5000, 'n_jobs': -1, 'random_state': 2}, 'model_name': 'random_forest', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'diff_open_value', 'open-prev_close', 'year', 'month', 'day', 'day_of_week', 'week_of_year'], 'train_start': datetime.datetime(2015, 1, 1, 0, 0), 'unique_uuid': 'b121bfdb-0c51-11ef-9424-c0e434d84b22', 'model': RandomForestClassifier(max_depth=7, n_estimators=5000, n_jobs=-1,
                       random_state=2)} 

(2363, 8) (2363, 1)
                open  diff_open_value  open-prev_close  year  month  day  \
2015-01-02   15.6290         0.051499 

day =  3 
 xgboost == {'company': 'amazon', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.3, 'eval_metric': 'rmse', 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': 10, 'n_jobs': -1, 'objective': 'reg:squarederror', 'random_state': 2, 'subsample': 1}, 'model_name': 'xgboost', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'close_max_3_days', 'close_min_3_days', 'close_mean_3_days'], 'train_start': datetime.datetime(2015, 1, 1, 0, 0), 'unique_uuid': 'a559b477-089f-11ef-9dc4-c0e434d84b22', 'model': XGBClassifier(base_score=None, booster='gbtree', callbacks=None,
              colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
              device=None

day =  3 
 xgboost == {'company': 'apple', 'duration_training_history': 120, 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.3, 'eval_metric': 'rmse', 'max_depth': 9, 'min_child_weight': 1, 'n_estimators': 10, 'n_jobs': -1, 'objective': 'reg:squarederror', 'random_state': 2, 'subsample': 1}, 'model_name': 'xgboost', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'volume_lag_1'], 'unique_uuid': 'bc1bbde7-07f1-11ef-b1dc-c0e434d84b22', 'model': XGBClassifier(base_score=None, booster='gbtree', callbacks=None,
              colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
              device=None, early_stopping_rounds=None, enable_categorical=False,
        

day =  3 
 xgboost == {'company': 'google', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.3, 'eval_metric': 'rmse', 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': 10, 'n_jobs': -1, 'objective': 'reg:squarederror', 'random_state': 2, 'subsample': 1}, 'model_name': 'xgboost', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'year', 'month', 'day', 'day_of_week', 'week_of_year'], 'train_start': datetime.datetime(2020, 1, 1, 0, 0), 'unique_uuid': 'be8ae732-08a8-11ef-82dc-c0e434d84b22', 'model': XGBClassifier(base_score=None, booster='gbtree', callbacks=None,
              colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
              device=None, earl

day =  1 
 xgboost == {'company': 'meta', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.3, 'eval_metric': 'rmse', 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': 5000, 'n_jobs': -1, 'objective': 'reg:squarederror', 'random_state': 2, 'subsample': 1}, 'model_name': 'xgboost', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'volume_lag_1'], 'train_start': datetime.datetime(2015, 1, 1, 0, 0), 'unique_uuid': '80337013-08a9-11ef-b1b3-c0e434d84b22', 'model': XGBClassifier(base_score=None, booster='gbtree', callbacks=None,
              colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
              device=None, early_stopping_rounds=None, enable_categori

day =  3 
 xgboost == {'company': 'netflix', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.3, 'eval_metric': 'rmse', 'max_depth': 9, 'min_child_weight': 1, 'n_estimators': 200, 'n_jobs': -1, 'objective': 'reg:squarederror', 'random_state': 2, 'subsample': 1}, 'model_name': 'xgboost', 'path_to_result': '/diploma_info/datalake/', 'problem': 'classification', 'target_column': 'growth', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'volume_lag_1', 'diff_close_value'], 'train_start': datetime.datetime(2023, 1, 1, 0, 0), 'unique_uuid': '98a48d60-0e14-11ef-9a51-c0e434d84b22', 'model': XGBClassifier(base_score=None, booster='gbtree', callbacks=None,
              colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
              device=None, early_stopping_rounds

## REGRESSION

__forecast the price itself for next several days__

In [14]:
problem = 'regression'
models_list = ['xgboost', 'lightgbm', 'random_forest', 'linear_regression']


paths_to_configs = []
configs = []

models = {
    'amazon': '1e38d412-0dc3-11ef-8dc7-c0e434d84b22',
    'apple': '29753b7f-0dc3-11ef-b287-c0e434d84b22',
    'meta': '3d63daab-0dc3-11ef-966d-c0e434d84b22',
    'google': '343a246e-0dc3-11ef-9323-c0e434d84b22',
    'netflix': '462af9c7-0dc3-11ef-ab57-c0e434d84b22'
}

for i in range(len(models.items())):  
    comp = list(models.keys())[i]
    paths_to_configs += glob(f'/diploma_info/datalake/wf_result/{problem}/{comp}/daily/*/research_task_*/*_{models[comp]}.yaml')
    
for file in paths_to_configs:
    with open(file, 'r') as f:
        configs.append(yaml.safe_load(f))

for config in configs:
    config['train_end'] = train_end
    config['test_start'] = test_start
    config['test_end'] = test_end
    if config['model_name'] == 'linear_regression':
        config['model'] = LinearRegression(**config['model_hyperparameters'])
    elif config['model_name'] == 'xgboost':
        config['model'] = XGBRegressor(**config['model_hyperparameters'])
    elif config['model_name'] == 'random_forest':
        config['model'] = RandomForestRegressor(**config['model_hyperparameters'])
    elif config['model_name'] == 'lightgbm':
        config['model'] = LGBMRegressor(**config['model_hyperparameters'])

In [15]:
df_preds_r = pd.DataFrame()

for company in companies:
    company = company.lower()
    
    full_set = full_sets[company]
    print(f'company: {company}')
    
    _ = [i for i in configs if i['company'] == company]
    print(f'count_configs {len(_)} \n')
    
    for config in _:
        print(config['model_name'], '==', config, '\n')
        
        y_pred = run_wfv(full_set, config, forecast_steps, company, models_list, problem, df_preds_c[company])
        
        df_preds_r = add_predictions(problem, company, df_preds_r, y_pred, config, n=None)

company: amazon
count_configs 1 

linear_regression == {'company': 'amazon', 'forecast_frequency': 'daily', 'forecast_periods': 4, 'model_hyperparameters': {'fit_intercept': False, 'n_jobs': -1, 'positive': False}, 'model_name': 'linear_regression', 'path_to_result': '/diploma_info/datalake/', 'problem': 'regression', 'target_column': 'close', 'test_end': datetime.datetime(2024, 5, 14, 0, 0), 'test_start': datetime.datetime(2024, 5, 13, 0, 0), 'train_end': datetime.datetime(2024, 5, 10, 0, 0), 'train_features': ['open', 'volume_lag_1', 'usa_inflation_%', '1_EUR_to_UAH_lag_1', '1_UAH_to_EUR_lag_1'], 'train_start': datetime.datetime(2023, 1, 1, 0, 0), 'unique_uuid': '1e38d412-0dc3-11ef-8dc7-c0e434d84b22', 'model': LinearRegression(fit_intercept=False, n_jobs=-1)} 

(2363, 6) (2363, 1)
                open  volume_lag_1  usa_inflation_%  1_EUR_to_UAH_lag_1  \
2015-01-02   15.6290    40960000.0             0.76              19.138   
2015-01-05   15.3505    55664000.0             0.76     

In [16]:
# Example of results 

'''
# classification
{
    'amazon': {
        'exp_1': [y_pred],
        'exp_2': [y_pred]
    }
}

# regression

{
    'amazon': [y_pred]
}
'''

"\n# classification\n{\n    'amazon': {\n        'exp_1': [y_pred],\n        'exp_2': [y_pred]\n    }\n}\n\n# regression\n\n{\n    'amazon': [y_pred]\n}\n"

In [17]:
df_preds_c

Unnamed: 0,amazon,apple,google,meta,netflix
2024-05-13,0.0,1.0,1.0,1.0,1.0
2024-05-14,1.0,1.0,0.0,0.0,0.0
2024-05-15,0.0,0.0,0.0,0.0,0.0
2024-05-16,0.0,1.0,0.0,1.0,0.0
2024-05-17,0.0,0.0,1.0,0.0,0.0
2024-05-20,0.0,0.0,1.0,0.0,0.0
2024-05-21,0.0,0.0,1.0,0.0,0.0
2024-05-22,0.0,0.0,1.0,0.0,0.0


In [18]:
df_preds_r

Unnamed: 0,amazon,apple,google,meta,netflix
2024-05-13,186.03155,183.271258,171.679079,479.821617,615.681277
2024-05-14,188.504142,183.271258,169.541665,477.001067,606.921298
2024-05-15,185.154331,182.275392,169.023829,471.507238,597.289728
2024-05-16,184.263789,183.544792,168.505992,472.642457,587.658159
2024-05-17,183.373248,181.144323,170.261429,460.51958,578.026589
2024-05-20,182.482706,175.737295,169.743592,455.025751,568.39502
2024-05-21,181.592164,174.415598,169.225756,449.531922,558.76345
2024-05-22,180.701623,171.75685,168.707919,444.038093,549.131881


In [20]:
full_sets_orig = {}

for company, period in itertools.product(companies, time_period):
    path = f"/diploma_info/datalake/processed_data/{company}_{period}.csv"

    full_sets_orig[company.lower()] = pd.read_csv(
        path,
        parse_dates=["date"],
        date_parser=date_parse,
        index_col=["date"],
    )

df_res = pd.DataFrame()

for company in companies:
    
    company = company.lower()
    
    fact = full_sets_orig[company].loc[df_preds_c.index, ['close']].values.ravel()
    pred = df_preds_r[company].values
    
    df_res[f'{company}_close'] = (pred/fact - 1) * 100
    
    fact = full_sets_orig[company].loc[df_preds_c.index, ['growth']].values.ravel()
    pred = df_preds_c[company].values
    
    df_res[f'{company}_growth'] = [1 if er==0 else 0 for er in (fact-pred)]
    
df_res.index = df_preds_c.index

KeyError: "['2024-05-15', '2024-05-16', '2024-05-17', '2024-05-20', '2024-05-21', '2024-05-22'] not in index"

In [22]:
full_sets_orig['netflix'].loc[df_preds_c.index, ['close', 'growth']]

KeyError: "['2024-05-15', '2024-05-16', '2024-05-17', '2024-05-20', '2024-05-21', '2024-05-22'] not in index"

In [None]:
df_res