In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.ensemble import BaggingRegressor
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.feature_selection import mutual_info_regression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import Ridge
from itertools import combinations
import numpy as np

# Load the dataset
df = pd.read_csv('/Users/parmikenia/Desktop/internship codes/combined_credit2.csv')

# Defining features and target
X = df.drop(columns=['Customer_ID', 'Credit_Score'])
y = df['Credit_Score']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

random_forest = RandomForestRegressor(random_state=42)
rfe = RFE(estimator=random_forest, n_features_to_select=10)
rfe.fit(X_train_scaled, y_train)

# Get the selected feature indices and corresponding feature names
selected_feature_indices = rfe.get_support(indices=True)
selected_feature_names = X.columns[selected_feature_indices]

# Print the selected features
print(f'Selected features by RFE: {selected_feature_names}')
# Step 2: Bagging Regressor with selected features
X_train_selected = rfe.transform(X_train_scaled)
X_test_selected = rfe.transform(X_test_scaled)

# Initialize individual models
models = {
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'KNN': KNeighborsRegressor(),
    'GBM': GradientBoostingRegressor(),
    'XGBoost': XGBRegressor(),
    'Linear Regression': LinearRegression(),
    'SVM': SVR(kernel='rbf'),
    'Bagging': BaggingRegressor()
}

# Function to calculate the MSE and RMSE for a given model pair
def evaluate_model_pair(model_1_name, model_2_name):
    model_1 = models[model_1_name]
    model_2 = models[model_2_name]
    
    # Create a hybrid model (Voting Regressor with two models)
    hybrid_model = VotingRegressor(estimators=[(model_1_name, model_1), (model_2_name, model_2)])
    hybrid_model.fit(X_train_selected, y_train)
    
    # Make predictions
    y_pred = hybrid_model.predict(X_test_selected)
    
    # Calculate MSE and RMSE
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    
    return mse, rmse

# Evaluate all pairs of models and find the best combination
best_mse = float('inf')
best_rmse = float('inf')
best_pair = None

for model_1_name, model_2_name in combinations(models.keys(), 2):
    mse, rmse = evaluate_model_pair(model_1_name, model_2_name)
    
    if mse < best_mse:
        best_mse = mse
        best_rmse = rmse
        best_pair = (model_1_name, model_2_name)

# Print the results
print(f"Best Model Pair: {best_pair}")
print(f"Lowest MSE: {best_mse}")
print(f"Lowest RMSE: {best_rmse}")

Selected features by RFE: Index(['Age', 'Income', 'Loan_Amount', 'Loan_Term_Months', 'Rent_Payments',
       'Social_Media_Activity_Score', 'Transaction_Behavior_Score',
       'gender_male', 'demographics_Sub_Urban', 'profession_Marketer Manager'],
      dtype='object')
Best Model Pair: ('Linear Regression', 'Bagging')
Lowest MSE: 0.08825032465055178
Lowest RMSE: 0.2970695619725316
