In [1]:
import os

In [2]:
%pwd

'd:\\Angus_Issues\\NorthCarolina_CameroonChapter_AngusIssues\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Angus_Issues\\NorthCarolina_CameroonChapter_AngusIssues'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [6]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
        )

        return data_transformation_config


In [8]:
import os
from mlProject import logger
import pandas as pd
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
import joblib

In [9]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config


    
    ## Note: You can add different data transformation techniques such as Scaler, PCA and all
    #You can perform all kinds of EDA in ML cycle here before passing this data to the model

    # I am only adding train_test_spliting cz this data is already cleaned up


    def train_test_spliting(self):
        data = pd.read_csv(self.config.data_path)
        data.drop('Unnamed: 0', axis=1, inplace=True)
        data['Date'] = pd.to_datetime(data['Date']) # Convert 'Date' column to datetime format
        data = data.set_index('Date')

        # Initialize forecast results DataFrame
        forecast_results = pd.DataFrame(columns=['Date', 'Cow', 'Predicted Body Weight (kg)'])

        # Define the date to split the data (adjust the date as needed)
        split_date = pd.to_datetime('2022-02-15')

        # Extract unique cow IDs from the DataFrame
        unique_cows = data['Cow'].unique()

        # Continue with the forecasting logic
        for cow in unique_cows:
            cow_data = data[data['Cow'] == cow]

            # Ensure that the index is a DatetimeIndex
            cow_data.index = pd.to_datetime(cow_data.index)

            # Split the data into train and test based on the split_date
            cow_train_data = cow_data[cow_data.index < split_date]
            cow_test_data = cow_data[cow_data.index >= split_date]

            # Fit the model and make predictions
            #model_ETS = ETSModel(cow_train_data['Body Weight (kg)'], error='add', trend=None, seasonal=None)
            model_ETS = ETSModel(cow_train_data['Body Weight (kg)'], order=self.config.order)
            model_fit = model_ETS.fit()

            # Make predictions for the test set
            cow_predictions = model_fit.predict(start=len(cow_train_data), end=len(cow_train_data) + len(cow_test_data) - 1)

            # Store the predictions
            cow_forecast_df = pd.DataFrame({
                'Date': cow_test_data.index,
                'Cow': cow,
                'Predicted Body Weight (kg)': cow_predictions
            })

            forecast_results = pd.concat([forecast_results, cow_forecast_df])

        forecast_results.reset_index(drop=True, inplace=True)
        print(forecast_results)

        # Ensure the directory exists
        model_dir = os.path.join(self.config.root_dir, 'model_trainer')
        os.makedirs(model_dir, exist_ok=True)


        joblib.dump(model_fit, os.path.join(self.config.root_dir, self.config.model_name))
    
        


In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.train_test_spliting()
except Exception as e:
    raise e

[2024-08-17 19:58:36,742: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-17 19:58:36,747: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-17 19:58:36,750: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-08-17 19:58:36,752: INFO: common: created directory at: artifacts]
[2024-08-17 19:58:36,754: INFO: common: created directory at: artifacts/data_transformation]


AttributeError: 'DataTransformationConfig' object has no attribute 'order'