In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

In [21]:
df=pd.read_csv("corr_data_45_pca.csv")
df

Unnamed: 0,is_baseline_formation,swelling_severity,PC1,PC2,PC3,PC4,cycles_to_50_pct,cycles_to_60_pct,cycles_to_70_pct,cycles_to_80_pct
0,1,0,-116.89778,6.409425,11.353424,2.59503,400,391,373,328
1,1,0,-149.129752,0.0282,-2.075101,-2.558736,401,368,338,314
2,1,1,34.564947,20.924337,-7.950795,-5.635809,520,498,455,386
3,1,0,-145.819107,12.285564,0.139448,9.003172,389,381,363,315
4,1,0,-99.622958,-2.523835,13.416934,-1.064819,405,400,381,333
5,1,1,-72.37137,23.071388,1.793307,8.049396,430,422,405,355
6,1,0,-98.875979,-3.003603,-5.53885,-8.465275,419,408,362,336
7,1,0,-98.874231,-3.283033,1.060883,-8.183976,412,402,362,340
8,1,1,-81.86052,0.507349,-9.406288,-6.178022,432,421,378,339
9,0,3,27.905064,-0.139614,-11.341511,-4.460382,496,478,461,385


In [22]:
X=df.drop(columns=['cycles_to_50_pct', 'cycles_to_60_pct', 'cycles_to_70_pct', 'cycles_to_80_pct']).values
y=df['cycles_to_70_pct'].values

In [23]:
import numpy as np

def calculate_mpe(y_true, y_pred):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    n = len(y_true)
    if n == 0:
        return 0  # Avoid division by zero if there are no data points
    mpe = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
    return mpe


In [24]:
def ridge_regression(X, y, alpha=1.0, test_size=0.4):

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)

    y_pred = ridge_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": ridge_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "r2": r2,
        "mpe": mpe,
    }

    return results

In [25]:
results = ridge_regression(X, y, alpha=75000)

print("R2 Score (Pandas dataframe):",results['r2'])
print("Ridge Regression Results:")
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


R2 Score (Pandas dataframe): 0.6296046240749088
Ridge Regression Results:
Mean Squared Error: 1958.1356668222365
Mean Absolute Percentage Error: 0.0779238490253323
Mean Percentage Error: 7.79238490253323
Coefficients: [-0.00173784  0.004601    0.32208506 -0.01116762 -0.00182886  0.01032512]
Intercept: 427.83358500346213


In [23]:
def lasso_regression(X, y, alpha=1.0, test_size=0.4):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)

    y_pred = lasso_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": lasso_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "mpe": mpe,
        "r2": r2,
    }

    return results


In [278]:
results = lasso_regression(X, y, alpha=2000)

print("Lasso Regression Results:")
print("R2 Score (Pandas dataframe):",results['r2'])
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


Lasso Regression Results:
R2 Score (Pandas dataframe): 0.8497220000467429
Mean Squared Error: 1179.5672430393538
Mean Absolute Percentage Error: 0.057004265552919486
Mean Percentage Error: 5.700426555291949
Coefficients: [-0.          0.43799205 -0.          0.         -0.        ]
Intercept: 487.7056352317752


In [146]:
def elastic_regression(X, y, alpha=1.0, l1_ratio=0.5, test_size=0.4):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    elastic_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    elastic_model.fit(X_train, y_train)

    y_pred = elastic_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": elastic_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "mpe": mpe,
        "r2": r2,
    }

    return results


In [341]:
results = elastic_regression(X, y, alpha=2000, l1_ratio=1)

print("Elastic Regression Results:")
print("R2 Score (Pandas dataframe):",results['r2'])
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


Elastic Regression Results:
R2 Score (Pandas dataframe): 0.8192059583336976
Mean Squared Error: 1360.110750548692
Mean Absolute Percentage Error: 0.06142787040336935
Mean Percentage Error: 6.142787040336935
Coefficients: [-0.         0.4325344  0.        -0.         0.       ]
Intercept: 487.1448067076522
