In [1]:
import os

In [2]:
%pwd

'c:\\code\\ML\\time_series_forecast\\time-series-forecast\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\code\\ML\\time_series_forecast\\time-series-forecast'

In [5]:
#config.yaml


In [6]:
#config entity

In [7]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class EDA :
    data_path : Path
    report_path : Path
    data_output : Path

In [8]:
#configuration


In [9]:
from src.time_series.constants import *
from src.time_series.utils.common import create_directories , read_yaml
from src.time_series.entity.config_entity import DataIngestionconfig



class ConfigurationManager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH,params_file_path=PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self)->DataIngestionconfig:
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionconfig(root_dir= config.root_dir,source_url=config.source_url,local_data_file=config.local_data_file)
        return data_ingestion_config
    def get_eda_config(self)->EDA:
        config = self.config.EDA
        create_directories([config.report_path])
        create_directories([config.data_output])
        eda_config = EDA(data_path= config.data_path,report_path=config.report_path,data_output=config.data_output)
        
        return eda_config
    

In [11]:
#component
import pandas as pd

In [18]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
class EDAComponent:
    def __init__(self,config:EDA):
        self.config = config

    def load_data(self):
        if not os.path.exists(self.config.data_path):
            raise FileNotFoundError(f"Input file not found: {self.config.data_path}")
        return pd.read_csv(self.config.data_path,parse_dates=['Month'], index_col='Month')
    def get_eda(self):
        df = self.load_data()
        df.columns = ['passengers']
        with open(os.path.join(self.config.report_path, 'data_report.txt'), 'w') as f:
            f.write("Data Head:\n")
            f.write(df.head().to_string())
            f.write("\n\n Column-wise Count:\n")
            f.write(df.count().to_string())
            f.write("\n\n Total Rows:\n")
            f.write(str(len(df)))

        #time series plot 
        plt.figure(figsize=(10,5))
        plt.plot(df,label = 'monthly passengers')
        plt.xlabel('Date')
        plt.ylabel('Number of Passengers')
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(self.config.report_path,'plot.png'))
        plt.clf()

        #seasonal decompose
        result = seasonal_decompose(df, model='multiplicative')
        fig = result.plot()
        fig.set_size_inches(10, 8)
        plt.tight_layout()
        plt.savefig(os.path.join(self.config.report_path, 'seasonal_decompose.png'))
        plt.clf()



In [19]:
#pipeline

In [20]:
try:
    config = ConfigurationManager()
    eda_config = config.get_eda_config()
    eda_comp = EDAComponent(config= eda_config)
    eda_comp.get_eda()
except Exception as e :
    raise e

[2025-07-02 22:07:27,074: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-02 22:07:27,077: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-02 22:07:27,078: INFO: common: created directory at: artifacts]
[2025-07-02 22:07:27,079: INFO: common: created directory at: artifacts/EDA/report]
[2025-07-02 22:07:27,081: INFO: common: created directory at: artifacts/EDA]


<Figure size 1000x500 with 0 Axes>

<Figure size 1000x800 with 0 Axes>