# **Target Processing**

In [10]:
# Necessary libraries

import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path

In [11]:
# Adjust directories (sources and outputs)

load_dotenv()

raw_data_path = os.getenv("RAW_DATA_PATH")
processed_data_path = os.getenv("PROCESSED_DATA_PATH")
prepared_data_path = os.getenv("PREPARED_DATA_PATH")

raw_data_path = Path(raw_data_path)
processed_data_path = Path(processed_data_path)
prepared_data_path = Path(prepared_data_path)

# SPY Load

In [12]:
# Main feature - S&P 500 ETF (SPY)

spy = pd.read_csv(raw_data_path / 'SPY_raw_data.csv', header = 0)

spy = spy.iloc[2:].reset_index(drop = True)
spy = spy.rename(columns = {spy.columns[0]: 'Date'})
spy['Date'] = pd.to_datetime(spy['Date'])
spy = spy.set_index('Date')
spy = spy.apply(pd.to_numeric, errors = 'coerce')

print(spy.info())
print("--" * 30)

spy.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
 1   High    5256 non-null   float64
 2   Low     5256 non-null   float64
 3   Open    5256 non-null   float64
 4   Volume  5256 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 246.4 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700


In the 2 following elements, the daily returns and the daily log-returns will be calculated and exported into a CSV file.

# SPY Returns

In [13]:
# SPY Daily Returns

spy2 = spy.copy()

spy2['SPY_Returns'] = spy2['Close'].pct_change()

spy_daily_returns = spy2[['SPY_Returns']].copy()

spy2.head(10)

# Save the processed data

data_processed_path = os.join(processed_data_path, 'spy_daily_returns.csv')
spy_daily_returns.to_csv(data_processed_path)

Unnamed: 0_level_0,Close,High,Low,Open,Volume,SPY_Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000,
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600,-0.012219
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300,-0.006901
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700,0.005084
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700,-0.001433
2005-01-10,80.962662,81.275626,80.513623,80.513623,56563300,0.004728
2005-01-11,80.404732,80.785731,80.275462,80.717696,63099700,-0.006891
2005-01-12,80.670135,80.85383,79.955757,80.554476,72720500,0.003301
2005-01-13,80.023773,80.778972,79.942128,80.717737,55537500,-0.008012
2005-01-14,80.445618,80.642922,80.119048,80.261923,42032500,0.005271


# SPY Log-Returns

In [14]:
# SPY Daily Log-Returns

spy2['SPY_Log_Returns'] = np.log(spy2['Close'] / spy2['Close'].shift(1))

spy_daily_log_returns = spy2[['SPY_Log_Returns']].copy()

spy2.head(10)

# Save the processed data

data_processed_path = os.join(processed_data_path, 'spy_daily_log_returns.csv')
spy_daily_returns.to_csv(data_processed_path)

Unnamed: 0_level_0,Close,High,Low,Open,Volume,SPY_Returns,SPY_Log_Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000,,
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600,-0.012219,-0.012295
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300,-0.006901,-0.006925
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700,0.005084,0.005072
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700,-0.001433,-0.001434
2005-01-10,80.962662,81.275626,80.513623,80.513623,56563300,0.004728,0.004717
2005-01-11,80.404732,80.785731,80.275462,80.717696,63099700,-0.006891,-0.006915
2005-01-12,80.670135,80.85383,79.955757,80.554476,72720500,0.003301,0.003295
2005-01-13,80.023773,80.778972,79.942128,80.717737,55537500,-0.008012,-0.008045
2005-01-14,80.445618,80.642922,80.119048,80.261923,42032500,0.005271,0.005258
