In [19]:
import pandas as pd
import os
import numpy as np

In [20]:
start_date = '2014-03-01'
end_date = '2025-03-10'
data_path = os.path.join('..', 'data', f'nvidia_data_{start_date}_{end_date}')
nvda_df = pd.read_csv(data_path)

In [37]:
class DataProcessor(object):
    def __init__(self):
        
        self.keep_cols = ['Date', 'Close_NVDA', 'Close_QQQ', 'Close_^VIX','Volume_NVDA',
       'Close_SMH', 'Close_SOXX', 'Close_XSD', 'Close_^DJI', 'Close_^GSPC', 'Close_^IXIC',
       'Volume_QQQ', 'Volume_SMH', 'Volume_SOXX', 'Volume_XSD', 'Volume_^DJI',
       'Volume_^GSPC', 'Volume_^IXIC', 'rsi', 'macd', 'macd_diff', 'stoch_k',
       'stoch_d', 'Month', 'Dayofweek']
        self.skewed_cols = ['Volume_NVDA', 'Volume_QQQ', 'Volume_SMH', 'Volume_SOXX', 'Volume_XSD', 'Volume_^DJI',
       'Volume_^GSPC', 'Volume_^IXIC', 'Close_SMH', 'Close_NVDA', 'Close_^VIX']
        
    def preprocess_data_for_training(self, df):
        '''
        Aplica transformaciónes matematicas a los datos y selecciona algunas columnas definidas en el notebook 03
        '''
        df = df.copy()
    
        df = df[self.keep_cols]
        df.dropna() #Botar los datos 
        
        #Transformar las columnas
        for col in self.skewed_cols:
            df[col] = np.log1p(df[col])
        #Aplicar logaritmica con signo
        df['macd'] = np.sign(df['macd']) * np.log1p(np.abs(df['macd']))


        return df

    def create_target(self, df):
        '''
        Función que crea el target que es precio de nvidia al dia siguente de los datos de cierre que tenemos
        '''
        df = df.copy() 
        df['target'] = df['Close_NVDA'].shift(-1)
        df['target'] = np.expm1(df['target'])

        df.dropna(inplace=True) #Elimina el ultimo dato para el training pues no tiene target disponible

        return df
        
        
        

In [38]:
data_processor = DataProcessor()
data_process = data_processor.preprocess_data_for_training(nvda_df)

In [39]:
data_process.skew(numeric_only= True) #Para nuevos procesos de entrenamiento revisar esto

Close_NVDA      0.389807
Close_QQQ       0.629095
Close_^VIX      0.884100
Volume_NVDA     0.140079
Close_SMH       0.163365
Close_SOXX      0.744385
Close_XSD       0.436048
Close_^DJI      0.281168
Close_^GSPC     0.630853
Close_^IXIC     0.541979
Volume_QQQ      0.130370
Volume_SMH     -0.170950
Volume_SOXX    -0.564514
Volume_XSD      0.358153
Volume_^DJI    -0.744349
Volume_^GSPC    0.383649
Volume_^IXIC    0.236521
rsi            -0.088880
macd            0.971241
macd_diff      -0.324383
stoch_k        -0.449128
stoch_d        -0.448524
Month          -0.011926
Dayofweek      -0.011044
dtype: float64

In [40]:
data_w_target = data_processor.create_target(data_process)

In [43]:
data_w_target.head()

Unnamed: 0,Date,Close_NVDA,Close_QQQ,Close_^VIX,Volume_NVDA,Close_SMH,Close_SOXX,Close_XSD,Close_^DJI,Close_^GSPC,...,Volume_^GSPC,Volume_^IXIC,rsi,macd,macd_diff,stoch_k,stoch_d,Month,Dayofweek,target
33,2014-04-17,0.36437,79.075996,2.664447,18.84942,3.041551,23.174227,31.846125,16408.539062,1864.849976,...,21.929665,21.393513,53.730401,0.00132,0.000213,54.744368,53.964396,4,3,0.443159
34,2014-04-21,0.366834,79.663055,2.656757,18.577719,3.046816,23.383673,32.191715,16449.25,1871.890015,...,21.694991,21.157927,56.305772,0.001754,0.000517,63.846261,57.217143,4,0,0.446949
35,2014-04-22,0.369457,80.296028,2.652537,19.357245,3.053514,23.593121,32.759811,16514.369141,1879.550049,...,21.89123,21.35237,58.931403,0.002375,0.000912,76.153434,64.914687,4,1,0.452159
36,2014-04-23,0.373052,79.589661,2.658159,19.382251,3.05038,23.572466,32.684071,16501.650391,1875.390015,...,21.850051,21.308137,62.287244,0.003248,0.00143,93.076427,77.69204,4,2,0.456186
37,2014-04-24,0.375821,80.351089,2.661657,19.785476,3.057471,23.70521,32.99651,16501.650391,1878.609985,...,21.88386,21.479796,64.688526,0.004215,0.001921,87.341788,85.523883,4,3,0.443633


In [44]:
ABT_path = os.path.join('..', 'data', 'ABTs', 'principal_ABT.csv')
data_w_target.drop(columns= ['Date']).to_csv(ABT_path)