In [15]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [16]:
# Load and preprocess data
df = pd.read_csv('../22_HP/outputs/simulation_results_not.csv')
df = df.drop(columns=['delay_LH_NodeA', 'delay_HL_NodeA'], axis=1)

In [17]:
x = df.drop(columns=['leakage'], axis=1).values
y = df['leakage'].values

In [18]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(df.drop('leakage', axis=1).values)
y_scaled = scaler.fit_transform(df['leakage'].values.reshape(-1, 1))

### Scaling

In [19]:

def train_and_test(x, y):
    # Split data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Initialize and train the linear regression model
    regression_model = LinearRegression()
    regression_model.fit(x_train, y_train)

    # Make predictions on the test set
    y_pred = regression_model.predict(x_test)
    # y_pred = scaler.fit_transform(y_pred).reshape(-1, 1)
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return mse, r2

In [20]:
mse_unscaled, r2_unscaled = train_and_test(x, y)

In [21]:
print("Unscaled Data - Baseline Model Performance:")
print("Mean Squared Error:", mse_unscaled)
print("R2 Score:", r2_unscaled)

Unscaled Data - Baseline Model Performance:
Mean Squared Error: 3.489941919742469e-17
R2 Score: 0.20045982187241806


In [22]:
mse_scaled, r2_scaled = train_and_test(x_scaled, y_scaled)
print("\nScaled Data - Baseline Model Performance:")
print("Mean Squared Error:", mse_scaled)
print("R2 Score:", r2_scaled)


Scaled Data - Baseline Model Performance:
Mean Squared Error: 0.42395058290271154
R2 Score: 0.5743556712338156


In [23]:
def train_and_test_2(x, y):
    # Split data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Initialize and train the linear regression model
    regression_model = LinearRegression()
    regression_model.fit(x_train, y_train)

    # Make predictions on the test set
    y_pred = regression_model.predict(x_test)
    y_pred = scaler.fit_transform(y_pred).reshape(-1, 1)
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return mse, r2

In [24]:
mse_scaled, r2_scaled = train_and_test_2(x_scaled, y_scaled)

In [25]:
print("\nScaled Data - Baseline Model Performance:")
print("Mean Squared Error:", mse_scaled)
print("R2 Score:", r2_scaled)


Scaled Data - Baseline Model Performance:
Mean Squared Error: 0.483274423612126
R2 Score: 0.5147948229252632


In [26]:
def train_and_test_3(x, y):
    # Split data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Initialize and train the linear regression model
    regression_model = LinearRegression()
    regression_model.fit(x_train, y_train)

    # Make predictions on the test set
    y_pred = regression_model.predict(x_test)
    y_pred = scaler.inverse_transform(y_pred).reshape(-1, 1)
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return mse, r2

In [27]:
mse_scaled, r2_scaled = train_and_test_3(x_scaled, y_scaled)
print("\nScaled Data - Baseline Model Performance:")
print("Mean Squared Error:", mse_scaled)
print("R2 Score:", r2_scaled)


Scaled Data - Baseline Model Performance:
Mean Squared Error: 0.4550867484456797
R2 Score: 0.543095112061685
