In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []

    # Use regression models instead of classification models
    lin_reg = LinearRegression()
    RF = RandomForestRegressor(n_estimators=10, random_state=0)
    DT = DecisionTreeRegressor(random_state=0)
    svr_model = SVR(kernel='linear')
    rfemodellist = [lin_reg, svr_model, RF, DT]

    for model in rfemodellist:
        print(model)
        rfe = RFE(estimator=model, n_features_to_select=n)
        rfe.fit(indep_X, dep_Y)
        rfe_features = rfe.transform(indep_X)
        rfelist.append(rfe_features)

    return rfelist

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    return X_train, X_test, y_train, y_test

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    return mse, r2, mae

def linear_regression(X_train, y_train, X_test, y_test):
    model = LinearRegression()
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def svr_linear(X_train, y_train, X_test, y_test):
    model = SVR(kernel='linear')
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def svr_nonlinear(X_train, y_train, X_test, y_test):
    model = SVR(kernel='rbf')
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def knn_regressor(X_train, y_train, X_test, y_test):
    model = KNeighborsRegressor(n_neighbors=5)
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def decision_tree(X_train, y_train, X_test, y_test):
    model = DecisionTreeRegressor(random_state=0)
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def random_forest(X_train, y_train, X_test, y_test):
    model = RandomForestRegressor(n_estimators=10, random_state=0)
    model.fit(X_train, y_train)
    return evaluate_model(model, X_test, y_test)

def rfe_classification(metrics):
    rfedataframe = pd.DataFrame(metrics, index=['LinearRegression', 'SVR Linear', 'RandomForest', 'DecisionTree'])
    return rfedataframe

# Load and preprocess the dataset
dataset1 = pd.read_csv("Automobilecodedata.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop(['displacement', 'weight', 'model_year', 'cylinders'], axis=1)
dep_Y = df2['mpg']

# Perform RFE to select top 3 features
rfelist = rfeFeature(indep_X, dep_Y, 3)
print("Selected Features by RFE for each model:", rfelist)

# Initialize list to store metrics for each model
metrics = {
    "MSE": [],
    "R2": [],
    "MAE": []
}

# Evaluate each model using the selected features
for i in rfelist:
    X_train, X_test, y_train, y_test = split_scalar(i, dep_Y)

    mse, r2, mae = linear_regression(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

    mse, r2, mae = svr_linear(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

    mse, r2, mae = svr_nonlinear(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

    mse, r2, mae = knn_regressor(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

    mse, r2, mae = decision_tree(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

    mse, r2, mae = random_forest(X_train, y_train, X_test, y_test)
    metrics["MSE"].append(mse)
    metrics["R2"].append(r2)
    metrics["MAE"].append(mae)

result = rfe_classification(metrics)
print(result)


LinearRegression()
SVR(kernel='linear')
RandomForestRegressor(n_estimators=10, random_state=0)
DecisionTreeRegressor(random_state=0)
Selected Features by RFE for each model: [array([[18.,  1.,  0.],
       [15.,  0.,  0.],
       [18.,  0.,  0.],
       ...,
       [32.,  0.,  0.],
       [28.,  0.,  0.],
       [31.,  0.,  0.]]), array([[18.,  0.,  1.],
       [15.,  0.,  1.],
       [18.,  0.,  1.],
       ...,
       [32.,  0.,  1.],
       [28.,  0.,  1.],
       [31.,  0.,  1.]]), array([[ 18. , 130. ,  12. ],
       [ 15. , 165. ,  11.5],
       [ 18. , 150. ,  11. ],
       ...,
       [ 32. ,  84. ,  11.6],
       [ 28. ,  79. ,  18.6],
       [ 31. ,  82. ,  19.4]]), array([[18. , 12. ,  1. ],
       [15. , 11.5,  1. ],
       [18. , 11. ,  1. ],
       ...,
       [32. , 11.6,  1. ],
       [28. , 18.6,  1. ],
       [31. , 19.4,  1. ]])]


ValueError: Shape of passed values is (24, 3), indices imply (4, 3)