In [4]:
import pandas as pd
import yaml

# Read the CSV file with Pandas
df = pd.read_csv(r'D:\Data Science\iNeuron\Internship\Insurance Premium Prediction\data\insurance.csv')

# Get column names and data types
schema = {'COLUMNS': {}}
for column, dtype in df.dtypes.items():
    schema['COLUMNS'][column] = dtype.name

# Write the schema dictionary to a YAML file
with open('schema.yaml', 'w') as yaml_file:
    yaml.dump(schema, yaml_file, default_flow_style=False)

print("Schema created and saved to schema.yaml.")


Schema created and saved to schema.yaml.


In [9]:
import yaml
import importlib

# Function to create an instance of a class
def create_instance(class_path, init_args=None):
    class_name = class_path.split('.')[-1]
    module_name = '.'.join(class_path.split('.')[:-1])
    
    module = importlib.import_module(module_name)
    class_ = getattr(module, class_name)
    
    if init_args:
        return class_(**init_args)
    else:
        return class_()

# Load the YAML file
with open('D:\Data Science\iNeuron\Internship\Insurance Premium Prediction\params\params.yaml', 'r') as yaml_file:
    models_config = yaml.load(yaml_file, Loader=yaml.FullLoader)

# Create a dictionary to store the model instances
models = {}

# Iterate through the model configurations
for model_info in models_config['models']:
    model_name = model_info['name']
    model_class_path = model_info['class']
    
    if 'init_args' in model_info:
        init_args = model_info['init_args']
        
        if 'estimator' in init_args:
            estimator_class_path = init_args['estimator']['class']
            estimator_init_args = init_args['estimator'].get('init_args', {})
            
            # Create an instance of the estimator
            estimator_instance = create_instance(estimator_class_path, estimator_init_args)
            
            # Update the init_args with the estimator instance
            init_args['estimator'] = estimator_instance
            
        # Create an instance of the model class with updated init_args
        model_instance = create_instance(model_class_path, init_args)
    else:
        # Create an instance of the model class with no init_args
        model_instance = create_instance(model_class_path)
    
    # Add the model instance to the dictionary
    models[model_name] = model_instance

# Now, you have a dictionary of model instances
# You can access them like this:
# models['LinearRegression']
# models['Lasso']
# models['Ridge']
# ... and so on


In [10]:
models

{'LinearRegression': LinearRegression(),
 'Lasso': Lasso(),
 'Ridge': Ridge(),
 'Elasticnet': ElasticNet(),
 'DecisionTree': DecisionTreeRegressor(random_state=42),
 'SVR linear': SVR(kernel='linear'),
 'SVR rbf': SVR(),
 'KNNR': KNeighborsRegressor(n_neighbors=3),
 'RandomForest': RandomForestRegressor(random_state=42),
 'AdaBoost': AdaBoostRegressor(estimator=DecisionTreeRegressor(random_state=42)),
 'Gradient Boosting': GradientBoostingRegressor(),
 'XGB': XGBRegressor(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, ma

In [2]:
import yaml
import importlib
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Function to create an instance of a class
def create_instance(class_path, init_args=None):
    class_name = class_path.split('.')[-1]
    module_name = '.'.join(class_path.split('.')[:-1])
    
    module = importlib.import_module(module_name)
    class_ = getattr(module, class_name)
    
    if init_args:
        return class_(**init_args)
    else:
        return class_()

# Load the YAML file
with open('D:\Data Science\iNeuron\Internship\Insurance Premium Prediction\params\params.yaml', 'r') as yaml_file:
    preprocessor_config = yaml.load(yaml_file, Loader=yaml.FullLoader)

# Initialize a dictionary to hold pipelines for each column group
column_group_pipelines = {}

# Iterate through column groups defined in the YAML file
for column_group, preprocessing_steps in preprocessor_config['preprocessor']['preprocessing_steps'].items():
    # Initialize pipelines for the current column group
    pipelines = []
    
    # Iterate through preprocessing steps for the current column group
    for step_info in preprocessing_steps:
        step_name = step_info['name']
        step_class_path = step_info['class']
        
        if 'init_args' in step_info:
            init_args = step_info['init_args']
        else:
            init_args = None
        
        # Create an instance of the preprocessing step class
        preprocessing_step = create_instance(step_class_path, init_args)
        
        # Add the preprocessing step to the current pipeline
        pipelines.append((step_name, preprocessing_step))
    
    # Create a pipeline for the current column group
    column_group_pipelines[column_group] = Pipeline(pipelines)

# Extract column groups
column_groups = preprocessor_config['preprocessor']['column_groups']

# Create a ColumnTransformer using the pipelines
preprocessor = ColumnTransformer(
    transformers=[
        (group, pipeline, column_groups[group]) for group, pipeline in column_group_pipelines.items()
    ]
)

# Now, you have pipelines for each column group and a ColumnTransformer combining them


In [3]:
preprocessor

In [4]:
for group, pipeline in column_group_pipelines.items():
    print(group, column_groups[group])

numerical_cols ['age', 'bmi', 'children']
categorical_cols ['region', 'sex', 'smoker']
