In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
import seaborn as sns
import matplotlib.pyplot as plt


In [2]:
# Load data
file_path = 'mimic_2425.csv'  #will be train_data.csv later!!!!
df = pd.read_csv(file_path)
df = df.rename(columns={'Unnamed: 0': 'Timestamp'})

# Selected Features
selected_features_dbp = ['PP', 'CO']
selected_features_sbp = ['PP', 'CO']

In [3]:
# Extract features and target variables based on selected features
features_sbp = df[selected_features_sbp]
features_dbp = df[selected_features_dbp]

target_sbp_dbp = df[['SBP', 'DBP']]  # Combine both SBP and DBP in a single target variable

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    pd.concat([features_sbp, features_dbp], axis=1), target_sbp_dbp, test_size=0.2, random_state=42
)

In [5]:
# Implement multivariate regression
regression_model = MultiOutputRegressor(LinearRegression())
regression_model.fit(X_train, y_train)


In [6]:
# Make predictions on the test set
predictions = regression_model.predict(X_test)

In [7]:
# Evaluate the performance
mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)  # RMSE is the square root of MSE
sd = y_test.subtract(predictions).std()

print("Mean Absolute Error (MAE) for SBP and DBP:", mae)
print("Root Mean Squared Error (RMSE) for SBP and DBP:", rmse)
print("Standard Deviation (SD) for SBP and DBP:", sd)

# Select a few samples from the test set
num_samples_to_test = 5
X_samples = X_test[:num_samples_to_test]

# Make predictions for the selected samples
sample_predictions = regression_model.predict(X_samples)

# Convert predictions and actual values to a DataFrame for easier comparison
sample_results = pd.DataFrame({
    'Predicted SBP': sample_predictions[:, 0],
    'Predicted DBP': sample_predictions[:, 1],
    'Actual SBP': y_test.iloc[:num_samples_to_test, 0].values,
    'Actual DBP': y_test.iloc[:num_samples_to_test, 1].values
})

# Display the results
print("\nSample Predictions vs Actual Values:")
display(sample_results)

Mean Absolute Error (MAE) for SBP and DBP: 5.6897289356686604
Root Mean Squared Error (RMSE) for SBP and DBP: 7.7169940370828565
Standard Deviation (SD) for SBP and DBP: SBP    7.716349
DBP    7.716349
dtype: float64

Sample Predictions vs Actual Values:


Unnamed: 0,Predicted SBP,Predicted DBP,Actual SBP,Actual DBP
0,77.375845,47.275845,86.2,56.1
1,108.158302,61.558302,97.7,51.1
2,84.247472,51.847472,92.8,60.4
3,91.189863,52.489863,98.2,59.5
4,92.533964,51.133964,92.7,51.3


In [14]:
import joblib

# Save the trained model to a file
model_filename = 'regression_model.joblib'
joblib.dump(regression_model, model_filename)


['regression_model.joblib']