In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes

In [2]:
def process_weighted_X_train(X_train, adaptive_weights):
    # Manually apply sample weights to the input features
    weighted_X_train = X_train * adaptive_weights

    # Replace infinite values with the mean of non-infinite values
    non_inf_values = weighted_X_train[~np.isinf(weighted_X_train)]
    mean_value = np.mean(non_inf_values)
    weighted_X_train[np.isinf(weighted_X_train)] = mean_value

    return weighted_X_train

In [3]:
data = pd.read_csv('out1.csv')

In [4]:
X = data.drop(['price'], axis=1)
y = data['price']


In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
alphas = np.logspace(-5, 1, 100)

# Define the hyperparameter grid for grid search
param_grid = {'alpha': alphas}

In [8]:
lasso_model = Lasso()
grid_search = GridSearchCV(lasso_model, param_grid, cv=20)

In [9]:
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters
best_alpha = grid_search.best_params_['alpha']

In [10]:
feature_importance = np.abs(grid_search.best_estimator_.coef_)

In [22]:
# Adapt the Penalty (calculate adaptive weights)
adaptive_weights = 1 / (5 * feature_importance)

In [23]:
# Use the function to process weighted_X_train
weighted_X_train = process_weighted_X_train(X_train_scaled, adaptive_weights)

In [24]:
# Fit the Adaptive Lasso Model with the best alpha
adaptive_lasso_model = Lasso(alpha=best_alpha, positive=True)
adaptive_lasso_model.fit(weighted_X_train, y_train)

Lasso(alpha=0.00016297508346206434, copy_X=True, fit_intercept=True,
      max_iter=1000, normalize=False, positive=True, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [25]:
y_train_pred = adaptive_lasso_model.predict(X_train_scaled)
y_test_pred = adaptive_lasso_model.predict(X_test_scaled)


In [26]:
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

In [27]:
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)

In [28]:
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)


In [29]:
print(f'Best Alpha: {best_alpha}')

Best Alpha: 0.00016297508346206434


In [30]:
print(f'Training MSE: {train_mse:.2f}, MAE: {train_mae:.2f}, RMSE: {train_rmse:.2f}')

Training MSE: 0.13, MAE: 0.29, RMSE: 0.37


In [31]:
print(f'Testing MSE: {test_mse:.2f}, MAE: {test_mae:.2f}, RMSE: {test_rmse:.2f}')

Testing MSE: 0.14, MAE: 0.29, RMSE: 0.38


In [21]:
# Extract Selected Features
selected_features = X.columns[adaptive_lasso_model.coef_ != 0]
print('Selected Features:', selected_features)

Selected Features: Index(['bathrooms', 'sqft_living', 'floors', 'waterfront', 'view', 'condition',
       'sqft_above', 'yr_renovated', 'city_pca1'],
      dtype='object')


In [24]:
selected_features_df = pd.DataFrame(selected_features, columns=['Selected Features'])



In [25]:
# Save selected features to a CSV file
selected_features_df.to_csv('selected_features.csv', index=False)