In [2]:
import torch 
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [3]:
file_path = 'Project/measures_v2.csv'

df = pd.read_csv(file_path)

df.head()

Unnamed: 0,u_q,coolant,stator_winding,u_d,stator_tooth,motor_speed,i_d,i_q,pm,stator_yoke,ambient,torque,profile_id
0,-0.450682,18.805172,19.08667,-0.350055,18.293219,0.002866,0.004419,0.000328,24.554214,18.316547,19.850691,0.187101,17
1,-0.325737,18.818571,19.09239,-0.305803,18.294807,0.000257,0.000606,-0.000785,24.538078,18.314955,19.850672,0.245417,17
2,-0.440864,18.82877,19.08938,-0.372503,18.294094,0.002355,0.00129,0.000386,24.544693,18.326307,19.850657,0.176615,17
3,-0.327026,18.835567,19.083031,-0.316199,18.292542,0.006105,2.6e-05,0.002046,24.554018,18.330833,19.850647,0.238303,17
4,-0.47115,18.857033,19.082525,-0.332272,18.291428,0.003133,-0.064317,0.037184,24.565397,18.326662,19.850639,0.208197,17


In [4]:
def apply_z_score_normalization_except_profile_id(df):

    profile_id = df['profile_id']
    df_to_scale = df.drop('profile_id', axis=1)

    # Creating a StandardScaler object
    scaler = StandardScaler()

    # Fitting the scaler to the data and transforming it
    df_scaled = scaler.fit_transform(df_to_scale)

    # Creating a new DataFrame with the scaled data
    df_scaled = pd.DataFrame(df_scaled, columns=df_to_scale.columns)

    # Adding the 'profile_id' column back to the scaled DataFrame
    df_scaled['profile_id'] = profile_id

    return df_scaled

In [5]:
df_norm = apply_z_score_normalization_except_profile_id(df)

In [6]:
def add_ewa_features_by_profile_id(df, spans, exclude_columns):
    """
    Adds multiple exponentially weighted average columns for each original numeric column in the DataFrame 
    for each span value, grouped by 'profile_id'. New features are created only for the original columns.

    :param df: The DataFrame to which the EWA columns will be added.
    :param spans: A list of span values for the EWA calculation.
    :param exclude_columns: A list of column names to be excluded from the EWA calculation.
    :return: DataFrame with new EWA columns added for each span and each original numeric column.
    """
    # Ensure 'profile_id' is not in the exclude_columns
    exclude_columns = set(exclude_columns) | {'profile_id'}

    # List of original columns to apply EWA
    original_columns = [col for col in df.columns if col not in exclude_columns]

    # Iterate over each original column
    for col in original_columns:
        # Apply EWA for each span value
        for span in spans:
            # Define the column name for the EWA feature
            ewa_col_name = f'{col}_ewa_{span}'

            # Group by 'profile_id' and apply EWA using transform
            ewa_col = df.groupby('profile_id')[col].transform(lambda x: x.ewm(span=span, adjust=False).mean())

            # Add the new EWA column to the DataFrame
            df[ewa_col_name] = ewa_col

    return df

In [7]:
exclude_columns = ['pm']

spans= [600]

data_df = add_ewa_features_by_profile_id(df_norm, spans, exclude_columns)

data_df.head()

Unnamed: 0,u_q,coolant,stator_winding,u_d,stator_tooth,motor_speed,i_d,i_q,pm,stator_yoke,...,coolant_ewa_600,stator_winding_ewa_600,u_d_ewa_600,stator_tooth_ewa_600,motor_speed_ewa_600,i_d_ewa_600,i_q_ewa_600,stator_yoke_ewa_600,ambient_ewa_600,torque_ewa_600
0,-1.238979,-0.799812,-1.648158,0.39282,-1.681117,-1.184128,1.058337,-0.405855,-1.786837,-1.494244,...,-0.799812,-1.648158,0.39282,-1.681117,-1.184128,1.058337,-0.405855,-1.494244,-2.443386,-0.400838
1,-1.23615,-0.799197,-1.647959,0.393521,-1.681047,-1.184129,1.058278,-0.405867,-1.787686,-1.494323,...,-0.79981,-1.648158,0.392822,-1.681116,-1.184128,1.058337,-0.405855,-1.494244,-2.443386,-0.400836
2,-1.238757,-0.798729,-1.648064,0.392464,-1.681078,-1.184128,1.058289,-0.405854,-1.787338,-1.493755,...,-0.799807,-1.648157,0.392821,-1.681116,-1.184128,1.058337,-0.405855,-1.494242,-2.443386,-0.400836
3,-1.23618,-0.798417,-1.648285,0.393356,-1.681146,-1.184126,1.05827,-0.405836,-1.786847,-1.493529,...,-0.799802,-1.648158,0.392823,-1.681116,-1.184128,1.058337,-0.405855,-1.49424,-2.443386,-0.400834
4,-1.239442,-0.797432,-1.648303,0.393101,-1.681195,-1.184127,1.057279,-0.405455,-1.786248,-1.493738,...,-0.799794,-1.648158,0.392823,-1.681117,-1.184128,1.058333,-0.405854,-1.494238,-2.443386,-0.400833


In [8]:
print(data_df.isnull().count())
data_df.head()

u_q                       1330816
coolant                   1330816
stator_winding            1330816
u_d                       1330816
stator_tooth              1330816
motor_speed               1330816
i_d                       1330816
i_q                       1330816
pm                        1330816
stator_yoke               1330816
ambient                   1330816
torque                    1330816
profile_id                1330816
u_q_ewa_600               1330816
coolant_ewa_600           1330816
stator_winding_ewa_600    1330816
u_d_ewa_600               1330816
stator_tooth_ewa_600      1330816
motor_speed_ewa_600       1330816
i_d_ewa_600               1330816
i_q_ewa_600               1330816
stator_yoke_ewa_600       1330816
ambient_ewa_600           1330816
torque_ewa_600            1330816
dtype: int64


Unnamed: 0,u_q,coolant,stator_winding,u_d,stator_tooth,motor_speed,i_d,i_q,pm,stator_yoke,...,coolant_ewa_600,stator_winding_ewa_600,u_d_ewa_600,stator_tooth_ewa_600,motor_speed_ewa_600,i_d_ewa_600,i_q_ewa_600,stator_yoke_ewa_600,ambient_ewa_600,torque_ewa_600
0,-1.238979,-0.799812,-1.648158,0.39282,-1.681117,-1.184128,1.058337,-0.405855,-1.786837,-1.494244,...,-0.799812,-1.648158,0.39282,-1.681117,-1.184128,1.058337,-0.405855,-1.494244,-2.443386,-0.400838
1,-1.23615,-0.799197,-1.647959,0.393521,-1.681047,-1.184129,1.058278,-0.405867,-1.787686,-1.494323,...,-0.79981,-1.648158,0.392822,-1.681116,-1.184128,1.058337,-0.405855,-1.494244,-2.443386,-0.400836
2,-1.238757,-0.798729,-1.648064,0.392464,-1.681078,-1.184128,1.058289,-0.405854,-1.787338,-1.493755,...,-0.799807,-1.648157,0.392821,-1.681116,-1.184128,1.058337,-0.405855,-1.494242,-2.443386,-0.400836
3,-1.23618,-0.798417,-1.648285,0.393356,-1.681146,-1.184126,1.05827,-0.405836,-1.786847,-1.493529,...,-0.799802,-1.648158,0.392823,-1.681116,-1.184128,1.058337,-0.405855,-1.49424,-2.443386,-0.400834
4,-1.239442,-0.797432,-1.648303,0.393101,-1.681195,-1.184127,1.057279,-0.405455,-1.786248,-1.493738,...,-0.799794,-1.648158,0.392823,-1.681117,-1.184128,1.058333,-0.405854,-1.494238,-2.443386,-0.400833


In [9]:
import pandas as pd
import numpy as np
import torch

def divide_data_train_test(df, test_ratio=0.2):
    target = "pm"
    
    # Get unique profile IDs
    unique_ids = df['profile_id'].unique()
    
    # Determine the split index
    split_index = int(len(unique_ids) * (1 - test_ratio))
    
    # Sequentially split the profile IDs into train and test sets
    train_ids = unique_ids[:split_index]
    test_ids = unique_ids[split_index:]

    # Split the DataFrame into train and test based on profile IDs
    train_df = df[df['profile_id'].isin(train_ids)]
    test_df = df[df['profile_id'].isin(test_ids)]

    # Separating the target variable and features
    X_train = train_df.drop([target, "profile_id"], axis=1)
    y_train = train_df[target]

    X_test = test_df.drop([target, "profile_id"], axis=1)
    y_test = test_df[target]

    # Converting to PyTorch tensors
    X_train = torch.from_numpy(X_train.values).float()
    y_train = torch.from_numpy(y_train.values).float()

    X_test = torch.from_numpy(X_test.values).float()
    y_test = torch.from_numpy(y_test.values).float()

    return (X_train, y_train), (X_test, y_test)

# Use the function
train_data, test_data = divide_data_train_test(df, test_ratio=0.2)

# train_data and test_data are tuples containing the features and target for training and testing


In [10]:
(X_train, y_train), (X_test, y_test) = divide_data_train_test(data_df, test_ratio=0.2)

**Linear Regression**

In [11]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

def initialize_parameters(n_features):
    """ Initialize weights randomly and bias to zero. """
    weights = np.random.rand(n_features)
    bias = 0
    return weights, bias

def compute_cost(y, y_pred, weights, alpha):
    """ Compute the mean squared error cost with L2 regularization. """
    m = len(y)
    regularization_cost = alpha * np.sum(np.square(weights))
    cost = (1/m) * np.sum(np.square(y_pred - y)) + regularization_cost
    return cost

def gradient_descent(X, y, weights, bias, learning_rate, iterations, alpha):
    """ Perform gradient descent to learn weights and bias with L2 regularization. """
    m = len(y)
    costs = []
    y = y.numpy()  # Convert the PyTorch Tensor to a NumPy array

    for i in range(iterations):
        # Calculate predictions
        y_pred = np.dot(X, weights) + bias

        # Compute cost with regularization
        cost = compute_cost(y, y_pred, weights, alpha)
        costs.append(cost)

        # Calculate gradients with regularization term
        dw = (1/m) * np.dot(X.T, (y_pred - y)) + (alpha/m) * weights
        db = (1/m) * np.sum(y_pred - y)

        # Update parameters
        weights -= learning_rate * dw
        bias -= learning_rate * db

        if i % 100 == 0:
            print(f"Iteration {i}: Cost {cost}")

    return weights, bias, costs






In [12]:
'''

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

def cross_validation(X, y, alphas, learning_rates, k=5):
    """ Perform k-fold cross-validation over different alpha and learning rate values. """
    kf = KFold(n_splits=k)
    results = {}

    for alpha in alphas:
        for lr in learning_rates:
            mse_scores = []

            for train_index, val_index in kf.split(X):
                X_train, X_val = X[train_index], X[val_index]
                y_train, y_val = y[train_index], y[val_index]

                n_features = X_train.shape[1]
                weights, bias = initialize_parameters(n_features)

                # Train the model
                trained_weights, trained_bias, _ = gradient_descent(
                    X_train, y_train, weights, bias, lr, 1000, alpha
                )

                # Predict on validation set
                y_val_pred = np.dot(X_val, trained_weights) + trained_bias

                # Compute and store the MSE
                mse = mean_squared_error(y_val, y_val_pred)
                mse_scores.append(mse)

            # Store the average MSE for this alpha and learning rate
            avg_mse = np.mean(mse_scores)
            results[(alpha, lr)] = avg_mse
            print(f"Alpha: {alpha}, Learning Rate: {lr}, Avg MSE: {avg_mse}")

    return results

# Example usage
alphas = [0.01, 0.1]  # Different values of alpha
learning_rates = [0.001, 0.01]  # Different values of learning rate

# Assuming X and y are your complete dataset as numpy arrays
results = cross_validation(X_train, y_train, alphas, learning_rates)

# Find the best parameters
best_params = min(results, key=results.get)
best_mse = results[best_params]
print(f"Best parameters: Alpha = {best_params[0]}, Learning Rate = {best_params[1]}, with MSE = {best_mse}")

'''

'\n\nfrom sklearn.model_selection import KFold\nfrom sklearn.metrics import mean_squared_error\n\ndef cross_validation(X, y, alphas, learning_rates, k=5):\n    """ Perform k-fold cross-validation over different alpha and learning rate values. """\n    kf = KFold(n_splits=k)\n    results = {}\n\n    for alpha in alphas:\n        for lr in learning_rates:\n            mse_scores = []\n\n            for train_index, val_index in kf.split(X):\n                X_train, X_val = X[train_index], X[val_index]\n                y_train, y_val = y[train_index], y[val_index]\n\n                n_features = X_train.shape[1]\n                weights, bias = initialize_parameters(n_features)\n\n                # Train the model\n                trained_weights, trained_bias, _ = gradient_descent(\n                    X_train, y_train, weights, bias, lr, 1000, alpha\n                )\n\n                # Predict on validation set\n                y_val_pred = np.dot(X_val, trained_weights) + trained_b

In [13]:
''' 

import matplotlib.pyplot as plt

y_true = y_test  # True values
y_pred = y_test_pred  # Predicted values from your model

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(y_true, label='True Values', color='blue')
plt.plot(y_pred, label='Predictions', color='red')
plt.title('Time Series Prediction')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

'''

" \n\nimport matplotlib.pyplot as plt\n\ny_true = y_test  # True values\ny_pred = y_test_pred  # Predicted values from your model\n\n# Plotting\nplt.figure(figsize=(12, 6))\nplt.plot(y_true, label='True Values', color='blue')\nplt.plot(y_pred, label='Predictions', color='red')\nplt.title('Time Series Prediction')\nplt.xlabel('Time')\nplt.ylabel('Value')\nplt.legend()\nplt.show()\n\n"

**Decision Tree**

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Load your dataset
# df = pd.read_csv('your_data.csv')


# Create a Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)

# Train the model
dt_regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 0.22376789616674223


**Random Forest**

In [15]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Assuming X_train, X_test, y_train, y_test are already defined

# Create a Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100)  # 100 trees in the forest

# Train the model
rf_regressor.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_regressor.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
print("Random Forest - Mean Squared Error:", mse_rf)


Random Forest - Mean Squared Error: 0.14241367078338943
