In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load sensor input data (first 50 columns)
inputdf = pd.read_csv('/kaggle/input/diabetes-dataset/sensor_data.csv', header=None).iloc[:, :50]

# 2. Assign column names
labels = ['white', 'red', 'ir', 'green', 'none']
new_columns = [f'{label}{i+1}' for label in labels for i in range(10)]
inputdf.columns = new_columns

# 3. Load and repeat output values
outputdf = pd.read_csv('/kaggle/input/diabetes-dataset/result_diabetes2.csv', header=None)
outputdf.columns = ['value']

def repeat_rows(df, repeat_count):
    return df.loc[df.index.repeat(repeat_count)].reset_index(drop=True)

repeated_df = repeat_rows(outputdf, 10)

# 4. Define selected features
selected_columns = [
    'white1', 'white2', 'white3', 'white4', 'white6', 'white7', 'white8', 'white9', 'white10',
    'red2', 'red4', 'red6', 'red7', 'red8', 'red9', 'red10',
    'ir1', 'ir2', 'ir4', 'ir5', 'ir6', 'ir8', 'ir9', 'ir10',
    'green1', 'green2', 'green3', 'green5', 'green6', 'green7', 'green8', 'green9',
    'none1', 'none2', 'none3', 'none5', 'none6', 'none7', 'none8', 'none10'
]

# 5. Define preprocessing step
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), selected_columns)
    ],
    remainder='drop'
)

# 6. Define pipeline with MLPRegressor
pipe = Pipeline([
    ('preprocess', preprocessor),
    ('model', MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42))
])

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(inputdf, repeated_df, test_size=0.2, random_state=42)

# 8. Fit the model
pipe.fit(X_train, y_train.values.ravel())  # Flatten y for MLPRegressor

# 9. Make predictions
predictions = pipe.predict(X_test)

# 10. Evaluate
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
epsilon = 1e-10
mard = np.mean(np.abs((y_test.values.flatten() - predictions.flatten()) / (y_test.values.flatten() + epsilon))) * 100

print(f"\nEvaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print(f"Mean Absolute Relative Difference (MARD): {mard:.2f}%")


In [None]:
import joblib
joblib.dump(pipe, 'model_pipeline.joblib')


In [None]:
import matplotlib.pyplot as plt

# Plot loss curve (training loss per epoch/iteration)
loss_values = pipe.named_steps['model'].loss_curve_

plt.figure(figsize=(8, 5))
plt.plot(loss_values, label='Training Loss')
plt.title('Loss Curve (MLPRegressor)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [14]:
# # New sensor row (reshape into a 1-row DataFrame)
# new_input = np.array([[
#     987, 1869, 3637, 4091, 5572, 7947, 10201, 10201, 10201, 4105,
#     16, 14, 16, 29, 20, 96, 620, 79, 577, 110,
#     22, 23, 23, 45, 23, 46, 25, 21, 518, 2827,
#     4, 3, 16, 136, 80, 20, 17, 12, 194, 10,
#     0, 0, 0, 15, 7, 1, 1, 1, 19, 0
# ]

# ])

# new_input_df = pd.DataFrame(new_input, columns=new_columns)

# # Predict
# predicted_value = pipe.predict(new_input_df)[0]

# print(f"\nPredicted Output for the Given Sensor Row: {predicted_value:.4f}")



Predicted Output for the Given Sensor Row: 112.2643


In [12]:
# # Predict on training and testing sets
# train_predictions = pipe.predict(X_train)
# test_predictions = pipe.predict(X_test)

# # Define a function to compute all metrics
# def evaluate(y_true, y_pred, dataset_name=""):
#     mse = mean_squared_error(y_true, y_pred)
#     mae = mean_absolute_error(y_true, y_pred)
#     r2 = r2_score(y_true, y_pred)
#     epsilon = 1e-10
#     mard = np.mean(np.abs((y_true.flatten() - y_pred.flatten()) / (y_true.flatten() + epsilon))) * 100
#     print(f"\n{dataset_name} Performance:")
#     print(f"  Mean Squared Error (MSE): {mse:.4f}")
#     print(f"  Mean Absolute Error (MAE): {mae:.4f}")
#     print(f"  R-squared (R²): {r2:.4f}")
#     print(f"  Mean Absolute Relative Difference (MARD): {mard:.2f}%")

# # Evaluate
# evaluate(y_train.values, train_predictions, "Training")
# evaluate(y_test.values, test_predictions, "Testing")



Training Performance:
  Mean Squared Error (MSE): 75.1009
  Mean Absolute Error (MAE): 5.8523
  R-squared (R²): 0.8141
  Mean Absolute Relative Difference (MARD): 5.36%

Testing Performance:
  Mean Squared Error (MSE): 91.5933
  Mean Absolute Error (MAE): 6.5595
  R-squared (R²): 0.7704
  Mean Absolute Relative Difference (MARD): 5.92%


In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.pipeline import Pipeline
# from sklearn.compose import ColumnTransformer
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# import tensorflow as tf
# from tensorflow.keras import layers, models, callbacks

# # 1. Load sensor input data (first 50 columns)
# inputdf = pd.read_csv('/kaggle/input/diabetes-dataset/sensor_data.csv', header=None).iloc[:, :50]

# # 2. Assign column names
# labels = ['white', 'red', 'ir', 'green', 'none']
# new_columns = [f'{label}{i+1}' for label in labels for i in range(10)]
# inputdf.columns = new_columns

# # 3. Load and repeat output values
# outputdf = pd.read_csv('/kaggle/input/diabetes-dataset/result_diabetes2.csv', header=None)
# outputdf.columns = ['value']

# def repeat_rows(df, repeat_count):
#     return df.loc[df.index.repeat(repeat_count)].reset_index(drop=True)

# repeated_df = repeat_rows(outputdf, 10)

# # 4. Define selected features
# selected_columns = [
#     'white1', 'white2', 'white3', 'white4', 'white6', 'white7', 'white8', 'white9', 'white10',
#     'red2', 'red4', 'red6', 'red7', 'red8', 'red9', 'red10',
#     'ir1', 'ir2', 'ir4', 'ir5', 'ir6', 'ir8', 'ir9', 'ir10',
#     'green1', 'green2', 'green3', 'green5', 'green6', 'green7', 'green8', 'green9',
#     'none1', 'none2', 'none3', 'none5', 'none6', 'none7', 'none8', 'none10'
# ]

# # 5. Preprocessing using sklearn
# scaler = StandardScaler()
# inputdf_scaled = inputdf.copy()
# inputdf_scaled[selected_columns] = scaler.fit_transform(inputdf[selected_columns])

# # 6. Train-test split
# X_train, X_test, y_train, y_test = train_test_split(inputdf_scaled[selected_columns], repeated_df['value'], test_size=0.2, random_state=42)

# # 7. Define TensorFlow model
# def build_model(input_dim):
#     model = models.Sequential([
#         layers.Input(shape=(input_dim,)),
#         layers.Dense(64, activation='relu'),
#         layers.Dense(32, activation='relu'),
#         layers.Dense(1)  # Output layer for regression
#     ])
#     model.compile(optimizer='adam', loss='mse', metrics=['mae'])
#     return model

# model = build_model(input_dim=len(selected_columns))

# # 8. Train the model
# early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# history = model.fit(
#     X_train, y_train,
#     validation_split=0.1,
#     epochs=500,
#     batch_size=32,
#     callbacks=[early_stop],
#     verbose=1
# )

# # 9. Make predictions
# predictions = model.predict(X_test).flatten()

# # 10. Evaluate
# mse = mean_squared_error(y_test, predictions)
# mae = mean_absolute_error(y_test, predictions)
# r2 = r2_score(y_test, predictions)
# epsilon = 1e-10
# mard = np.mean(np.abs((y_test.values.flatten() - predictions.flatten()) / (y_test.values.flatten() + epsilon))) * 100

# print(f"\nEvaluation Metrics:")
# print(f"Mean Squared Error (MSE): {mse:.4f}")
# print(f"Mean Absolute Error (MAE): {mae:.4f}")
# print(f"R-squared (R²): {r2:.4f}")
# print(f"Mean Absolute Relative Difference (MARD): {mard:.2f}%")
