In [None]:
from sklearn.model_selection import train_test_split

# Assuming we're trying to predict 'total_claim_amount' based on previous tasks
X = customer_df.drop(columns=['total_claim_amount'])
y = customer_df['total_claim_amount']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=31)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


In [None]:
from sklearn.preprocessing import OneHotEncoder

# Separate X_train and X_test into numerical and categorical subsets
X_train_num = X_train.select_dtypes(include=[np.number])
X_test_num = X_test.select_dtypes(include=[np.number])
X_train_cat = X_train.select_dtypes(exclude=[np.number])
X_test_cat = X_test.select_dtypes(exclude=[np.number])

# Encode the categorical variables using OneHotEncoder
encoder = OneHotEncoder(drop='first').fit(X_train_cat)  # Fit on training data only
X_train_cat_encoded = encoder.transform(X_train_cat).toarray()
X_test_cat_encoded = encoder.transform(X_test_cat).toarray()

# Convert encoded data into dataframes with appropriate column names and indexes
X_train_cat_encoded_df = pd.DataFrame(X_train_cat_encoded, columns=encoder.get_feature_names_out(X_train_cat.columns), index=X_train_cat.index)
X_test_cat_encoded_df = pd.DataFrame(X_test_cat_encoded, columns=encoder.get_feature_names_out(X_test_cat.columns), index=X_test_cat.index)

X_train_cat_encoded_df.head(), X_test_cat_encoded_df.head()


In [None]:
from sklearn.preprocessing import PowerTransformer, MinMaxScaler

# 1. Use X_train_num to fit a power transformer
power_transformer = PowerTransformer().fit(X_train_num)

# 2. Transform both X_train_num and X_test_num
X_train_num_transformed = power_transformer.transform(X_train_num)
X_test_num_transformed = power_transformer.transform(X_test_num)

# 3. Cast the resulting numpy arrays as pandas dataframes
X_train_num_transformed_df = pd.DataFrame(X_train_num_transformed, columns=X_train_num.columns, index=X_train_num.index)
X_test_num_transformed_df = pd.DataFrame(X_test_num_transformed, columns=X_test_num.columns, index=X_test_num.index)

# 4. Concatenate the transformed numerical and encoded categorical dataframes
X_train_new = pd.concat([X_train_num_transformed_df, X_train_cat_encoded_df], axis=1)
X_test_new = pd.concat([X_test_num_transformed_df, X_test_cat_encoded_df], axis=1)

# 5. Fit a MinMax scaler using X_train_new and transform X_train_new and X_test_new
scaler = MinMaxScaler().fit(X_train_new)
X_train_new_scaled = scaler.transform(X_train_new)
X_test_new_scaled = scaler.transform(X_test_new)

# Create new pandas dataframes from the resulting numpy arrays
X_train_new_scaled_df = pd.DataFrame(X_train_new_scaled, columns=X_train_new.columns, index=X_train_new.index)
X_test_new_scaled_df = pd.DataFrame(X_test_new_scaled, columns=X_test_new.columns, index=X_test_new.index)

X_train_new_scaled_df.head(), X_test_new_scaled_df.head()


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Train a simple linear regression model
lin_reg = LinearRegression().fit(X_train_new_scaled_df, y_train)
y_train_pred = lin_reg.predict(X_train_new_scaled_df)
y_test_pred = lin_reg.predict(X_test_new_scaled_df)

# 2. Create a function to evaluate model's predictions
def evaluate_model(y_true, y_pred):
    """
    Returns a dataframe with various error metrics.
    """
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    r2 = r2_score(y_true, y_pred)
    
    metrics = {
        'Error_metric': ['MAE', 'MSE', 'RMSE', 'MAPE', 'R2'],
        'Value': [mae, mse, rmse, mape, r2]
    }
    
    return pd.DataFrame(metrics)

# 3. Evaluate the linear model's predictions on the TRAIN and TEST sets
train_evaluation = evaluate_model(y_train, y_train_pred)
test_evaluation = evaluate_model(y_test, y_test_pred)

train_evaluation, test_evaluation


In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

def train_models(models_list, X_train, y_train):
    """
    Train multiple models on the provided training data.
    Returns a dictionary of trained models.
    """
    trained_models = {}
    for model in models_list:
        model_instance = model()
        model_instance.fit(X_train, y_train)
        trained_models[model.__name__] = model_instance
    return trained_models

# Train the models using default settings
models_to_train = [LinearRegression, KNeighborsRegressor, MLPRegressor]
trained_models = train_models(models_to_train, X_train_new_scaled_df, y_train)

# Evaluate the performance of the trained models on the TRAIN and TEST sets
train_evaluations = {}
test_evaluations = {}
for model_name, model_instance in trained_models.items():
    train_evaluations[model_name] = evaluate_model(y_train, model_instance.predict(X_train_new_scaled_df))
    test_evaluations[model_name] = evaluate_model(y_test, model_instance.predict(X_test_new_scaled_df))

train_evaluations, test_evaluations


In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

def train_models(models, X_train, y_train):
    """
    Trains a list of models and returns the trained models.
    """
    for model in models:
        model.fit(X_train, y_train)
    return models

# List of models to be trained
models = [
    LinearRegression(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Increasing max_iter for convergence
]

# Training the models
trained_models = train_models(models, X_train_new_scaled, y_train)

# Evaluating the trained models on TRAIN set
train_evaluations = [evaluate_predictions(y_train, model.predict(X_train_new_scaled)) for model in trained_models]

# Evaluating the trained models on TEST set
test_evaluations = [evaluate_predictions(y_test, model.predict(X_test_new_scaled)) for model in trained_models]

train_evaluations, test_evaluations


In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

def train_models(models, X_train, y_train):
    """
    Trains a list of models and returns the trained models.
    """
    for model in models:
        model.fit(X_train, y_train)
    return models

# List of models to be trained
models = [
    LinearRegression(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Increasing max_iter for convergence
]

# Training the models
trained_models = train_models(models, X_train_new_scaled, y_train)

# Evaluating the trained models on TRAIN set
train_evaluations = [evaluate_predictions(y_train, model.predict(X_train_new_scaled)) for model in trained_models]

# Evaluating the trained models on TEST set
test_evaluations = [evaluate_predictions(y_test, model.predict(X_test_new_scaled)) for model in trained_models]

train_evaluations, test_evaluations
