In [22]:
import os
os.chdir(r'D:\repositories\StockPredictorApp')  # Replace with your project root's absolute path
print(os.getcwd())  # Verify the working directory

D:\repositories\StockPredictorApp


In [25]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataTransformationConfig:
    root_dir: Path
    transformation_input: Path
    transformation_output: Path

In [26]:
from src.stockpredictor.utils.common import create_directories
from src.stockpredictor.constants.__init import *
from src.stockpredictor.entity import (DataIngestionConfig,
                                       DataValidationConfig)
from src.stockpredictor.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH) -> None:
        
        self.config = read_yaml(config_filepath)
        #self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self):
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir= config.root_dir,
            transformation_input = config.transformation_input,
            transformation_output = config.transformation_output
        )

        return data_transformation_config

manager = ConfigurationManager()
data_transformation_config = manager.get_data_transformation_config()


[2024-11-23 11:46:36,196: INFO: common: yaml file: D:\repositories\StockPredictorApp\config\config.yaml loaded successfully]
[2024-11-23 11:46:36,198: INFO: common: Directory artifacts created successfully]
[2024-11-23 11:46:36,201: INFO: common: Directory artifacts/data_transformation created successfully]


In [27]:
import pandas as pd
class DataTransformation():
    def __init__(self, config: DataTransformationConfig) -> None:
        self.config = config

    def calculate_rsi(self, df, window=14):
        try:
            delta = df['Close'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
            rs = gain / loss
            rsi = 100 - (100 / (1 + rs))
            return rsi
        except Exception as e:
            raise e

    def add_features(self, validated_dataframe: pd.DataFrame) -> pd.DataFrame:
        try:
            # Transform the values to numeric
            # Convert specific columns to numeric
            # columns_to_convert = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
            # for col in columns_to_convert:
            #     validated_dataframe[col] = pd.to_numeric(validated_dataframe[col], errors='coerce')


            validated_dataframe['Daily Returns'] = validated_dataframe['Close'].pct_change()
            validated_dataframe['High/Low Ratio'] = validated_dataframe['High'] / validated_dataframe['Low']
            validated_dataframe['Close/Open Ratio'] = validated_dataframe['Close'] / validated_dataframe['Open']
            validated_dataframe['Cumulative Returns'] = (1 + validated_dataframe['Daily Returns']).cumprod() - 1

            # Lag values for Close
            validated_dataframe['lag_1'] = validated_dataframe['Close'].shift(1)
            validated_dataframe['lag_2'] = validated_dataframe['Close'].shift(2)
            validated_dataframe['lag_7'] = validated_dataframe['Close'].shift(7)
            validated_dataframe['lag_30'] = validated_dataframe['Close'].shift(30)

            # Rolling statistical features
            validated_dataframe['moving_avg_5'] = validated_dataframe['Close'].rolling(window=5).mean()
            validated_dataframe['moving_avg_10'] = validated_dataframe['Close'].rolling(window=10).mean()
            validated_dataframe['moving_avg_30'] = validated_dataframe['Close'].rolling(window=30).mean()

            validated_dataframe['volatility_5'] = validated_dataframe['Close'].rolling(window=5).std()
            validated_dataframe['volatility_10'] = validated_dataframe['Close'].rolling(window=10).std()

            # Relative Strength Index
            validated_dataframe['rsi'] = self.calculate_rsi(validated_dataframe)

            # Add market sentiment and trend indicators
            validated_dataframe['price_change_pct'] = validated_dataframe['Close'].pct_change() * 100
            validated_dataframe['day_of_week'] = validated_dataframe['Date'].dt.dayofweek
            validated_dataframe['month'] = validated_dataframe['Date'].dt.month

            # Price volatality Indicators(True range and average true range)
            validated_dataframe['true_range'] = validated_dataframe['High'] - validated_dataframe['Low']
            validated_dataframe['atr'] = validated_dataframe['true_range'].rolling(window=14).mean() 

            # remove any none values created by features like rooling statistics
            transformaed_dataframe = validated_dataframe.dropna()
            print(transformaed_dataframe)
            return transformaed_dataframe
        except Exception as e:
            raise e
    
    def transform_data(self):
        try:
            validated_dataframe = pd.read_csv(self.config.transformation_input, parse_dates=['Date'])
            transformed_dataframe = self.add_features(validated_dataframe)
            transformed_dataframe.to_csv(self.config.transformation_output, index=False)
        except Exception as e:
            raise e

data_transformation = DataTransformation(data_transformation_config)
data_transformation.transform_data()

                         Date       Open       High        Low      Close  \
30  2022-02-15 00:00:00+00:00  24.948999  26.545000  24.784000  26.495001   
31  2022-02-16 00:00:00+00:00  26.259001  26.582001  25.552000  26.511000   
32  2022-02-17 00:00:00+00:00  25.629999  25.785000  24.165001  24.507000   
33  2022-02-18 00:00:00+00:00  24.667999  24.986000  23.100000  23.642000   
34  2022-02-22 00:00:00+00:00  23.035000  24.063999  23.000000  23.389999   
..                        ...        ...        ...        ...        ...   
496 2023-12-22 00:00:00+00:00  49.195000  49.382999  48.466999  48.830002   
497 2023-12-26 00:00:00+00:00  48.967999  49.599998  48.959999  49.278999   
498 2023-12-27 00:00:00+00:00  49.511002  49.680000  49.084999  49.417000   
499 2023-12-28 00:00:00+00:00  49.643002  49.883999  49.411999  49.521999   
500 2023-12-29 00:00:00+00:00  49.813000  49.997002  48.750999  49.521999   

     Adj Close     Volume  Daily Returns  High/Low Ratio  Close/Open Ratio 