In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

In [2]:
df=pd.read_csv("more_corr_data_25_pca.csv")
df

Unnamed: 0,is_baseline_formation,PC1,PC2,PC3,PC4,cycles_to_50_pct,cycles_to_60_pct,cycles_to_70_pct,cycles_to_80_pct
0,0,192.613031,9.348077,4.855122,3.691802,624,590,517,458
1,0,75.530657,8.529429,-7.571432,2.186970,560,512,550,412
2,0,36.307151,8.203337,-2.842000,2.152025,412,468,402,279
3,1,-132.622789,-2.076941,-4.040552,0.902875,446,378,393,312
4,1,-86.141742,1.217216,8.338003,-4.413646,449,402,362,325
...,...,...,...,...,...,...,...,...,...
75,0,86.760588,8.437928,-8.094932,2.775581,577,609,458,426
76,1,-55.782143,3.573888,-8.040304,5.436546,413,372,412,332
77,0,128.122063,14.778066,-4.520398,-0.029693,576,546,487,433
78,0,186.096014,-2.401902,-10.191068,-4.040991,632,522,550,382


In [15]:
X=df.drop(columns=['cycles_to_50_pct', 'cycles_to_60_pct', 'cycles_to_70_pct', 'cycles_to_80_pct']).values
y=df['cycles_to_70_pct'].values

In [4]:
import numpy as np

def calculate_mpe(y_true, y_pred):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    n = len(y_true)
    if n == 0:
        return 0  # Avoid division by zero if there are no data points
    mpe = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
    return mpe


In [5]:
def ridge_regression(X, y, alpha=1.0, test_size=0.4):

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)

    y_pred = ridge_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": ridge_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "r2": r2,
        "mpe": mpe,
    }

    return results

In [22]:
results = ridge_regression(X, y, alpha=1000, test_size=0.1)

print("R2 Score (Pandas dataframe):",results['r2'])
print("Ridge Regression Results:")
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


R2 Score (Pandas dataframe): 0.7716512943883819
Ridge Regression Results:
Mean Squared Error: 1056.569460864957
Mean Absolute Percentage Error: 0.06923078781965648
Mean Percentage Error: 6.923078781965648
Coefficients: [-0.3554864   0.39914417 -0.72109752 -0.64088625  0.63096421]
Intercept: 429.23090732021967


In [177]:
def lasso_regression(X, y, alpha=1.0, test_size=0.4):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)

    y_pred = lasso_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": lasso_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "mpe": mpe,
        "r2": r2,
    }

    return results


In [236]:
results = lasso_regression(X, y, alpha=1000, test_size=0.1)

print("Lasso Regression Results:")
print("R2 Score (Pandas dataframe):",results['r2'])
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


Lasso Regression Results:
R2 Score (Pandas dataframe): 0.6260315464791302
Mean Squared Error: 891.260316853613
Mean Absolute Percentage Error: 0.054744227116062895
Mean Percentage Error: 5.474422711606289
Coefficients: [-0.          0.36581079 -0.         -0.         -0.        ]
Intercept: 471.7266918855304


In [237]:
def elastic_regression(X, y, alpha=1.0, l1_ratio=0.5, test_size=0.4):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size
    )

    elastic_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    elastic_model.fit(X_train, y_train)

    y_pred = elastic_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mpe = calculate_mpe(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results = {
        "model": elastic_model,
        "predictions": y_pred,
        "mse": mse,
        "mape": mape,
        "mpe": mpe,
        "r2": r2,
    }

    return results


In [335]:
results = elastic_regression(X, y, alpha=2000, l1_ratio=0.1, test_size=0.1)

print("Elastic Regression Results:")
print("R2 Score (Pandas dataframe):",results['r2'])
print("Mean Squared Error:", results["mse"])
print("Mean Absolute Percentage Error:", results["mape"])
print("Mean Percentage Error:", results["mpe"])
print("Coefficients:", results["model"].coef_)
print("Intercept:", results["model"].intercept_)


Elastic Regression Results:
R2 Score (Pandas dataframe): 0.6648357826029518
Mean Squared Error: 1555.031045199883
Mean Absolute Percentage Error: 0.05534985202647265
Mean Percentage Error: 5.534985202647265
Coefficients: [-0.          0.37348847 -0.         -0.         -0.        ]
Intercept: 469.83591806023753
