In [2]:
import os
os.chdir("../")

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataPreparationConfig:
    root_dir: Path
    data_path: Path

In [4]:
from orangePlatform.constants import *
from orangePlatform.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_preparation_config(self) -> DataPreparationConfig:
        config = self.config.data_preparation

        create_directories([config.root_dir])

        data_preparation_config = DataPreparationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
        )

        return data_preparation_config

In [6]:
import os
from orangePlatform import logger
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [7]:
from sklearn.preprocessing import MinMaxScaler

In [8]:
start_date = '03/23/2023'
end_date = '03/21/2024'

# Create a datetime range
date_range = pd.date_range(start=start_date, end=end_date)

# Create an empty DataFrame with the datetime index
df_LTE = pd.DataFrame(index=date_range)

In [9]:
class DataPreparation:
    def __init__(self, config: DataPreparationConfig):
        self.config = config

    

    def train_test_spliting(self):
        data = pd.read_csv(self.config.data_path,index_col='Period')
        df= data['LTE'].copy()
        dates = pd.date_range(start='2022-03-23', periods=len(df), freq='D')
        df = df.to_frame()
        df.set_index(dates, inplace=True)
        # Split the data into training and test sets. (0.75, 0.25) split.
        train_lstm=df[:310]
        test_lstm=df[310:]
        scaler = MinMaxScaler()
        scaler.fit(train_lstm)
        scaled_train = scaler.transform(train_lstm)
        scaled_test = scaler.transform(test_lstm)
        scaled_trainn = pd.DataFrame(scaled_train,)
        scaled_testt = pd.DataFrame(scaled_test)

        scaled_trainn.to_csv(os.path.join(self.config.root_dir, "train_LTE.csv"),index = False)
        scaled_testt.to_csv(os.path.join(self.config.root_dir, "test_LTE.csv"),index = False)

        logger.info("Splited data into training and test sets")
        logger.info(scaled_train.shape)
        logger.info(scaled_test.shape)

        print(scaled_train.shape)
        print(scaled_test.shape)
        

In [10]:
try:
    config = ConfigurationManager()
    data_preparation_config = config.get_data_preparation_config()
    data_preparation = DataPreparation(config=data_preparation_config)
    data_preparation.train_test_spliting()
except Exception as e:
    raise e

[2024-05-11 00:06:40,536: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-11 00:06:40,538: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-11 00:06:40,541: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-05-11 00:06:40,544: INFO: common: created directory at: artifacts]
[2024-05-11 00:06:40,545: INFO: common: created directory at: artifacts/data_preparation]
[2024-05-11 00:06:40,564: INFO: 3377883501: Splited data into training and test sets]
[2024-05-11 00:06:40,564: INFO: 3377883501: (310, 1)]
[2024-05-11 00:06:40,564: INFO: 3377883501: (55, 1)]
(310, 1)
(55, 1)
