In [33]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
pip install --upgrade scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [5]:
# Load the California housing dataset
diabetes_data = load_diabetes()

# Get the feature matrix and target vector
X = diabetes_data.data
y = diabetes_data.target

In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [10]:
# Linear Regression (OLS)
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# Calculate metrics
mse_lr = mean_squared_error(y_test, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
mape_lr = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100
r2_lr = r2_score(y_test, y_pred_lr)

# Print results
print("Linear Regression (OLS)")
print(f'MSE: {mse_lr}')
print(f'RMSE: {rmse_lr}')
print(f'MAE: {mae_lr}')
print(f'MAPE (%): {mape_lr}')
print(f'R squared: {r2_lr}\n')

Linear Regression (OLS)
MSE: 2821.7509810013103
RMSE: 53.1201560709427
MAE: 41.91937845679274
MAPE (%): 36.67196318312674
R squared: 0.4772897164322618



In [12]:

# SGD Regression
sgd = SGDRegressor(max_iter=1000, tol=1e-3)
sgd.fit(X_train, y_train)
y_pred_sgd = sgd.predict(X_test)

# Calculate metrics
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
rmse_sgd = np.sqrt(mse_sgd)
mae_sgd = mean_absolute_error(y_test, y_pred_sgd)
mape_sgd = np.mean(np.abs((y_test - y_pred_sgd) / y_test)) * 100
r2_sgd = r2_score(y_test, y_pred_sgd)

# Print results
print("SGD Regression")
print(f'MSE: {mse_sgd}')
print(f'RMSE: {rmse_sgd}')
print(f'MAE: {mae_sgd}')
print(f'MAPE (%): {mape_sgd}')
print(f'R squared: {r2_sgd}\n')


SGD Regression
MSE: 3149.3572813516266
RMSE: 56.11913471670449
MAE: 45.68527017148962
MAPE (%): 41.2760415770079
R squared: 0.41660286514465283





In [29]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Ridge Regression
try:
    ridge = Ridge()
    ridge.fit(X_train, y_train)
    y_pred_ridge = ridge.predict(X_test)

    # Calculate metrics
    mse_ridge = mean_squared_error(y_test, y_pred_ridge)
    rmse_ridge = np.sqrt(mse_ridge)
    mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
    mape_ridge = np.mean(np.abs((y_test - y_pred_ridge) / y_test)) * 100
    r2_ridge = r2_score(y_test, y_pred_ridge)

    # Print results
    print("Ridge Regression")
    print(f'MSE: {mse_ridge}')
    print(f'RMSE: {rmse_ridge}')
    print(f'MAE: {mae_ridge}')
    print(f'MAPE (%): {mape_ridge}')
    print(f'R squared: {r2_ridge}\n')

except Exception as e:
    print(f"An error occurred: {e}")


Ridge Regression
MSE: 3112.9664151980814
RMSE: 55.79396396742287
MAE: 45.46352769933713
MAPE (%): 41.3132261326937
R squared: 0.4233440269603015



In [16]:

# Lasso Regression
lasso = Lasso()
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)

# Calculate metrics
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
rmse_lasso = np.sqrt(mse_lasso)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
mape_lasso = np.mean(np.abs((y_test - y_pred_lasso) / y_test)) * 100
r2_lasso = r2_score(y_test, y_pred_lasso)

# Print results
print("Lasso Regression")
print(f'MSE: {mse_lasso}')
print(f'RMSE: {rmse_lasso}')
print(f'MAE: {mae_lasso}')
print(f'MAPE (%): {mape_lasso}')
print(f'R squared: {r2_lasso}\n')


Lasso Regression
MSE: 3444.670824977634
RMSE: 58.69131813971836
MAE: 49.20790052364458
MAPE (%): 45.343978184215736
R squared: 0.36189802861957876



In [18]:
from sklearn.linear_model import ElasticNet

# Elastic Net
elastic_net = ElasticNet()
elastic_net.fit(X_train, y_train)
y_pred_en = elastic_net.predict(X_test)

# Calculate metrics
mse_en = mean_squared_error(y_test, y_pred_en)
rmse_en = np.sqrt(mse_en)
mae_en = mean_absolute_error(y_test, y_pred_en)
mape_en = np.mean(np.abs((y_test - y_pred_en) / y_test)) * 100
r2_en = r2_score(y_test, y_pred_en)

# Print results
print("Elastic Net")
print(f'MSE: {mse_en}')
print(f'RMSE: {rmse_en}')
print(f'MAE: {mae_en}')
print(f'MAPE (%): {mape_en}')
print(f'R squared: {r2_en}\n')


Elastic Net
MSE: 5381.535093816118
RMSE: 73.3589469241218
MAE: 63.935903555202415
MAPE (%): 60.83828084682587
R squared: 0.0031070233135319647



In [20]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# Polynomial Regression (Degree 2)
poly_reg = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
poly_reg.fit(X_train, y_train)
y_pred_poly = poly_reg.predict(X_test)

# Calculate metrics
mse_poly = mean_squared_error(y_test, y_pred_poly)
rmse_poly = np.sqrt(mse_poly)
mae_poly = mean_absolute_error(y_test, y_pred_poly)
mape_poly = np.mean(np.abs((y_test - y_pred_poly) / y_test)) * 100
r2_poly = r2_score(y_test, y_pred_poly)

# Print results
print("Polynomial Regression (Degree 2)")
print(f'MSE: {mse_poly}')
print(f'RMSE: {rmse_poly}')
print(f'MAE: {mae_poly}')
print(f'MAPE (%): {mape_poly}')
print(f'R squared: {r2_poly}\n')


Polynomial Regression (Degree 2)
MSE: 3168.9304439474345
RMSE: 56.29325398258156
MAE: 44.22139588944812
MAPE (%): 38.71227049292864
R squared: 0.4129770691620669



In [22]:
from sklearn.neighbors import KNeighborsRegressor

# k-Nearest Neighbors
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Calculate metrics
mse_knn = mean_squared_error(y_test, y_pred_knn)
rmse_knn = np.sqrt(mse_knn)
mae_knn = mean_absolute_error(y_test, y_pred_knn)
mape_knn = np.mean(np.abs((y_test - y_pred_knn) / y_test)) * 100
r2_knn = r2_score(y_test, y_pred_knn)

# Print results
print("k-Nearest Neighbors")
print(f'MSE: {mse_knn}')
print(f'RMSE: {rmse_knn}')
print(f'MAE: {mae_knn}')
print(f'MAPE (%): {mape_knn}')
print(f'R squared: {r2_knn}\n')


k-Nearest Neighbors
MSE: 3222.117894736842
RMSE: 56.763702264183244
MAE: 44.00300751879699
MAPE (%): 37.99729841061306
R squared: 0.4031244536507893



In [24]:
from sklearn.tree import DecisionTreeRegressor

# Decision Tree
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Calculate metrics
mse_dt = mean_squared_error(y_test, y_pred_dt)
rmse_dt = np.sqrt(mse_dt)
mae_dt = mean_absolute_error(y_test, y_pred_dt)
mape_dt = np.mean(np.abs((y_test - y_pred_dt) / y_test)) * 100
r2_dt = r2_score(y_test, y_pred_dt)

# Print results
print("Decision Tree")
print(f'MSE: {mse_dt}')
print(f'RMSE: {rmse_dt}')
print(f'MAE: {mae_dt}')
print(f'MAPE (%): {mape_dt}')
print(f'R squared: {r2_dt}\n')


Decision Tree
MSE: 5943.315789473684
RMSE: 77.09290362590895
MAE: 60.11278195488722
MAPE (%): 56.5686488894694
R squared: -0.10095905080398104



In [26]:

# Random Forest
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Calculate metrics
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mape_rf = np.mean(np.abs((y_test - y_pred_rf) / y_test)) * 100
r2_rf = r2_score(y_test, y_pred_rf)

# Print results
print("Random Forest")
print(f'MSE: {mse_rf}')
print(f'RMSE: {rmse_rf}')
print(f'MAE: {mae_rf}')
print(f'MAPE (%): {mape_rf}')
print(f'R squared: {r2_rf}\n')


Random Forest
MSE: 3010.3552398496245
RMSE: 54.866704291852855
MAE: 44.182932330827065
MAPE (%): 39.32780192742337
R squared: 0.44235205315564563



In [39]:
# Define regression models
models = {
    'Linear Regression (OLS)': LinearRegression(),
    'SGD Regression': SGDRegressor(max_iter=1000, tol=1e-3),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Elastic Net': ElasticNet(),
    'Polynomial Regression (Degree 2)': make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    'k-Nearest Neighbors': KNeighborsRegressor(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(n_estimators=100),
    'Support Vector Machine': SVR(),

}

# Initialize list to store results
results = []

# Evaluation function
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    r2 = r2_score(y_test, y_pred)
    
    return mse, rmse, mae, mape, r2

# Evaluate each model
for name, model in models.items():
    mse, rmse, mae, mape, r2 = evaluate_model(model, X_train, X_test, y_train, y_test)
    
    results.append({
        'Model': name,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE (%)': mape,
        'R squared': r2
    })

# Create a DataFrame with the results
results_df = pd.DataFrame(results)

# Display the results
print(results_df)



                              Model          MSE       RMSE        MAE  \
0           Linear Regression (OLS)  2821.750981  53.120156  41.919378   
1                    SGD Regression  3149.200441  56.117737  45.683157   
2                  Ridge Regression  3112.966415  55.793964  45.463528   
3                  Lasso Regression  3444.670825  58.691318  49.207901   
4                       Elastic Net  5381.535094  73.358947  63.935904   
5  Polynomial Regression (Degree 2)  3168.930444  56.293254  44.221396   
6               k-Nearest Neighbors  3222.117895  56.763702  44.003008   
7                     Decision Tree  5455.481203  73.861229  57.721805   
8                     Random Forest  2912.564384  53.968179  43.440075   
9            Support Vector Machine  4525.810200  67.274142  56.414721   

    MAPE (%)  R squared  
0  36.671963   0.477290  
1  41.277265   0.416632  
2  41.313226   0.423344  
3  45.343978   0.361898  
4  60.838281   0.003107  
5  38.712270   0.412977  
6  