In [3]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

# Load the dataset
housing = pd.read_csv('Ames_HousePrice.csv', index_col=0)

# Define the target variable
y = housing['SalePrice']

# Select all features excluding the target variable
X = housing.drop('SalePrice', axis=1)

# Identify numeric and categorical features
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# Preprocessing pipelines for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



In [4]:
from sklearn.linear_model import ElasticNetCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

# Define the ElasticNetCV model
elastic_net_cv_model = ElasticNetCV(cv=5, max_iter=10000)

# Create the final pipeline
elastic_net_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Ensure 'preprocessor' is defined elsewhere in your code
    ('regressor', elastic_net_cv_model)
])

# Perform cross-validation
elastic_net_scores = cross_val_score(elastic_net_pipeline, X, y, cv=5, scoring='r2')
elastic_net_mean_score = elastic_net_scores.mean()

# Fit the pipeline to the entire dataset
elastic_net_pipeline.fit(X, y)

# Extract the best alpha and l1_ratio values
best_alpha = elastic_net_cv_model.alpha_
best_l1_ratio = elastic_net_cv_model.l1_ratio_

print("Mean R^2 cross-validation score for ElasticNetCV:", elastic_net_mean_score)
print("Best alpha value selected by ElasticNetCV:", best_alpha)
print("Best l1_ratio value selected by ElasticNetCV:", best_l1_ratio)




Mean R^2 cross-validation score for ElasticNetCV: 0.180447924175893
Best alpha value selected by ElasticNetCV: 118.601377957355
Best l1_ratio value selected by ElasticNetCV: 0.5


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.metrics import make_scorer, mean_squared_error, r2_score

# Load the dataset
data =  pd.read_csv('Ames_HousePrice.csv', index_col=0)

# Handle missing values, encoding, etc.
# Assuming 'SalePrice' is the target variable and 'features' is a list of predictor variables
X = data.drop('SalePrice', axis=1)
y = data['SalePrice']

# Preprocessing pipeline for numeric and categorical features
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))])


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Combine preprocessor and Elastic Net model in a pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('elasticnet', ElasticNet())])

# Define the parameter grid
param_grid = {
    'elasticnet__alpha': [0.1, 1.0, 10.0],
    'elasticnet__l1_ratio': [0.1, 0.5, 0.9,0.95]
}

# GridSearch for hyperparameter tuning with cross-validation
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X, y)

# Best model
best_model = grid_search.best_estimator_

# Get best parameters
best_alpha = grid_search.best_params_['elasticnet__alpha']
best_l1_ratio = grid_search.best_params_['elasticnet__l1_ratio']


# Cross-validated performance metrics
cv_results = cross_val_score(best_model, X, y, cv=5, scoring='neg_mean_squared_error')
r2_scores = cross_val_score(best_model, X, y, cv=5, scoring='r2')


print(f'R-squared Scores: {r2_scores}')
print(f'Mean R-squared: {np.mean(r2_scores)}')
print(f'Best ElasticNet alpha: {best_alpha}')
print(f'Best ElasticNet l1_ratio: {best_l1_ratio}')

R-squared Scores: [0.86708683 0.91892363 0.92911858 0.92368613 0.92620805]
Mean R-squared: 0.9130046431279748
Best ElasticNet alpha: 0.1
Best ElasticNet l1_ratio: 0.95
