In [None]:
import pandas as pd
import numpy as np

class Utils:
    pass

class ConfigManager:
    def __init__():
        pass

    def update_config():
        pass

class IOHandler:
    def __init__(self, data_directory = "data",**kwargs):
        self.data_directory = data_directory
        for k in kwargs.keys():
          self.__setattr__(k, kwargs[k])
    def read_data(self, **kwargs):
        pass
    def read_config(self, **kwargs):
        pass
    def write_data(self, **kwargs):
        pass
    def write_config(self, **kwargs):
        pass
    
class Validator:
    def __init__(self) -> None:
        pass
    
    def validate(self, data, config):
        pass

class Extractor:
    def __init__(self) -> None:
        pass

    def extract(self, data, config):
        pass

class Parser:
    def __init__(self) -> None:
        pass

    def parse_data(self, data, config):
        pass

    def parse_config(self, config):
        pass

class Processor:
    def __init__(self) -> None:
        pass

    def process(self, data, config):
        pass

class ModelTransformer:
    pass

class Model:
    pass

__DataLoader:__

1. Read configs (ConfigManager)
2. Validate configs (Parser)
3. Read data  (IOHandler)
4. Extract data info  (MetadataExtractor)
5. Validate data (Validator)
6. Log info related to data (Logger)
7. Parse data in right format (Parser)
8. Return data and configs (DataLoader)


__ModelTransformer:__

1. Initialize using the configs (ModelTransformer)

__Model:__

1. Initialize using the configs (Model)

__Pipeline:__

1. Pipeline = ModelTransformer + Model
2. Pipeline.fit(data)
3. Pipeline.predict(data)

### __`GC V1 - APIs`__

```python
from lola_generator_components.so.models import HoltWinters
model_input_data = pd.read_csv("model_input_data.csv")

model = HoltWinters(country = "tz", model_config = model_config, data_config = data_config)
prepared_data = model.prepare(model_input_data)
X_train, y_train, X_test, y_test = model.split(prepared_data)
model.fit(X_train, y_train)
model.recommend(X_test)
model.score(X_test, y_test)
```

### __`GC V2 - APIs`__

```python
from lola_generator_components.so.models import HoltWinters
from lola_generator_components.so.processors import HoltWintersTransformer
from lola_generator_components.utils import pipeline, time_series_split, DataLoader, ConfigManager

model_config, data_config = ConfigManager(**data_config).load_configs()
data = DataLoader(**data_config["io_config"]).load_data()
pipeline = pipeline([
                        ("ModelTransformer", HoltWintersTransformer(**data_config["processor_config"])),
                        ("Model", HoltWinters(**model_config))
                    ])
X_train, y_train, X_test, y_test = time_series_split(data, **data_config["split_config"])
pipeline.fit(X_train, y_train)
pipeline.recommend(X_test)
pipeline.score(X_test, y_test)
```

### __This is one time or less frequent task. Once you have the directory structure, `data` and `configs` unless you don't want to update this, you don't run `get_config`, `get_data`__

```python
from lola_generator_components.utils import project_init
from lola_generator_components.utils import get_data
from lola_generator_components.utils import get_config
from lola_generator_components.utils import describe_models

project_init()
get_data()
get_config()
describe_models()
```

```bash
lola_generator_components init_project  # folder structure
lola_generator_components get_data      # get data
lola_generator_components get_config    # get config
```

In [None]:
sample_data_config_initial = {
    'io_config': {
        'data_directory': 'data',
        'config_directory': 'configs',
        'model_directory': 'models',
        'log_directory': 'logs',
        'compression': None,
        'data_format': 'csv',
        'select_columns': ['poc_id', 'sku_id', 'quantity', 'date'],
        'data_type_dict': {
            'number': ['quantity'],
            'category': ['poc_id', 'sku_id'],
            'date': ['date'],
            'boolean': None,
            },
        'column_mapper': {
            'poc_id': 'poc_id',
            'sku_id': 'sku_id',
            'quantity': 'quantity',
            'date': 'date',
            'region_id': None,
            'country_id': None,
            'city_id': None,
            'order_id': None,
            'channel_id': None,
            'brand_id': None,
            'subsegment_id': None,
            'state_id': None,
            'segment_id': None,
            'route_id': None,
            'delivery_center_id': None,
            'deliver_region_id': None,
            'sales_route_id': None,
            },
        },
    'parsing_config': {
        'date_format_configs': {'date': '%Y-%m-%d'},
        'input_data_precision': None,
        'output_data_precision': None,
        'optimized_data_schema_file_name': 'optimized_data_schema.json',
        },
    'validator_config': {
        'column_check': None,
        'range_check': {'date': None,'quantity': None},
        'unique_check': {'poc_id': None, 'sku_id': None},
        'null_check': ['poc_id', 'sku_id', 'date'],
        'duplicate_check': ['poc_id', 'sku_id', 'date'],
        },
    'processor_config': {
        'model_id': 'model_id',
        'model_id_constructor': ['poc_id', 'sku_id'],
        'model_split_character': '|||',
        'aggregation_date_freq_configs': {'date': 'MS'},
        'aggregation_category_level':['poc_id', 'sku_id','brand_id'],
        'aggregation_function': {
            'quantity': ['sum', 'mean'],
        },
        },
    }

In [None]:
sample_data_config_user_input = {
    'io_config': {
        'select_columns': ['poc_id', 'sku_id', 'quantity', 'date'],
        'data_type_dict': {
            'number': ['quantity'],
            'category': ['poc_id', 'sku_id'],
            'date': ['date'],
            'boolean': None,
            },
        'column_mapper': {
            'poc_id': 'poc_id',
            'sku_id': 'sku_id',
            'quantity': 'quantity',
            'date': 'date',
            },
        },
    'parsing_config': {
        'date_format_configs': {'date': '%Y-%m-%d'}
        },
    'validator_config': {
        'column_check': ['poc_id', 'sku_id', 'quantity', 'date'],
        'range_check': {'date': ['2020-01-01', '2020-01-05'],'quantity': [0, 2]},
        'unique_check': {'poc_id': [45, 66, 48], 'sku_id': [48, 40, 65, 75]},
        'null_check': ['poc_id', 'sku_id', 'date'],
        'duplicate_check': ['poc_id', 'sku_id', 'date'],
        },
    'processor_config': {
        'model_id': 'model_id',
        'model_id_constructor': ['poc_id', 'sku_id'],
        'aggregation_date_freq_configs': {'date': 'MS'},
        'aggregation_category_level':['poc_id', 'sku_id'],
        'aggregation_function': {
            'quantity': ['sum'],
        },
        },
    }