<a href="https://colab.research.google.com/github/mccoymb/AAE-590-DSMM/blob/main/590DSMM_HW6_2_small.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from google.colab import files
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
# Upload the CSV file
uploaded = files.upload()


df = pd.read_excel("GeneratedExcel.xlsx")

# Extract independent and dependent variables
X = df.iloc[:, 0].values.reshape(-1, 1)  # First column as independent variable
dependent_vars = df.columns[1:13]  # Columns B-M as dependent variables

Saving GeneratedExcel.xlsx to GeneratedExcel.xlsx


In [2]:
# Print X and dependent variables
print("Independent Variable (X):", X)
print("Dependent Variables:", dependent_vars.tolist())

Independent Variable (X): [[0.1 ]
 [0.15]
 [0.2 ]
 [0.25]
 [0.3 ]
 [0.35]
 [0.4 ]
 [0.45]
 [0.5 ]
 [0.55]
 [0.6 ]
 [0.65]
 [0.7 ]]
Dependent Variables: ['Holes 1mm', 'Mat A 1mm', 'Mat B 1mm', 'Holes 5mm', 'Mat A 5mm', 'Mat B 5mm', 'Holes 100N', 'Mat A 100N', 'Mat B 100N', 'Holes 500N', 'Mat A 500N', 'Mat B 500N']


In [7]:
# --- Evaluation Function ---
def evaluate_model(name, y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    print(f"\n {name} Evaluation")
    print(f"  R-squared:           {r2:.4f}")
    print(f"  Mean Squared Error:  {mse:.4f}")
    return r2, mse

# --- Cross-Validation Function ---
def cross_validate_model(model, X, y, k=5, name="Model"):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    scores = cross_val_score(model, X, y, scoring='r2', cv=kf)
    print(f" {name} {k}-Fold CV R²: {np.mean(scores):.4f}")
    return scores

# --- Store results for tabular summary ---
results = []

# --- Loop Through All Columns ---
for col_index in range(df.shape[1]):
    y = df.iloc[:, col_index].values
    y_col = df.columns[col_index]

    # Train-test split and scaling
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    X_scaled = scaler.transform(X)

    # --- Ridge Regression ---
    ridge = Ridge(alpha=1.0)
    ridge.fit(X_train_scaled, y_train)
    y_pred_ridge = ridge.predict(X_scaled)
    r2_ridge = r2_score(y, y_pred_ridge)
    mse_ridge = mean_squared_error(y, y_pred_ridge)
    cv_r2_ridge = np.mean(cross_val_score(ridge, X_scaled, y, scoring='r2', cv=5))

    # --- Lasso Regression ---
    lasso = Lasso(alpha=0.1)
    lasso.fit(X_train_scaled, y_train)
    y_pred_lasso = lasso.predict(X_scaled)
    r2_lasso = r2_score(y, y_pred_lasso)
    mse_lasso = mean_squared_error(y, y_pred_lasso)
    cv_r2_lasso = np.mean(cross_val_score(lasso, X_scaled, y, scoring='r2', cv=5))

    results.append({
        'Target': y_col,
        'Ridge R²': f"{r2_ridge:.4f}",
        'Ridge MSE': f"{mse_ridge:.4f}",
        'Ridge CV R²': f"{cv_r2_ridge:.4f}",
        'Lasso R²': f"{r2_lasso:.4f}",
        'Lasso MSE': f"{mse_lasso:.4f}",
        'Lasso CV R²': f"{cv_r2_lasso:.4f}"
    })

# --- Convert to DataFrame and display ---
results_df = pd.DataFrame(results)

print(results_df.to_csv(sep='\t', index=False))
results_df.to_csv("ridge_lasso_results.csv", index=False)

Target	Ridge R²	Ridge MSE	Ridge CV R²	Lasso R²	Lasso MSE	Lasso CV R²
Volume Fraction	0.9917	0.0003	0.4527	0.6406	0.0126	-26.3152
Holes 1mm	-0.0525	38307.0371	-807.3984	-0.0585	38526.4446	-870.5345
Mat A 1mm	0.4176	4105.0047	-3.7341	0.3728	4420.9108	-4.0005
Mat B 1mm	0.9240	24.2273	-104.0634	0.9234	24.3959	-61.9568
Holes 5mm	-0.0523	953996.2222	-800.9522	-0.0588	959919.6996	-865.0784
Mat A 5mm	0.4167	102119.1171	-3.7666	0.3713	110065.6809	-4.0313
Mat B 5mm	0.9234	609.0221	-185.9262	0.9227	615.0713	-104.4039
Holes 100N	0.4876	22.7660	-4421.3390	0.4875	22.7669	-6253.3923
Mat A 100N	0.9302	0.0056	-9.6273	0.8670	0.0107	-24.2117
Mat B 100N	0.9678	0.0001	-2.0574	-0.0078	0.0029	-131.5906
Holes 500N	0.4876	569.2396	-4422.5973	0.4867	570.2287	-6606.7206
Mat A 500N	0.9302	0.1405	-9.6265	0.9280	0.1449	-12.4170
Mat B 500N	0.9677	0.0024	-2.0901	0.8480	0.0111	-25.0977

