# Multiple Linear Regression with Python Scikit Learn
### Predict glucose levels (1 hour in the future) from 11 hours of data
### Parameters that affect glucose levels:

#### 1. BMI
#### 2. HbA1c
#### 3. 

In [None]:
# imports for multiple linear regression model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sklearn

from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [None]:

# Load and preprocess the data
# Assuming your data is in a pandas DataFrame named 'data' with values column 'value'
data = pd.read_csv('your_data.csv')

# Split the data into train and test sets
train_ratio = 0.8
train_size = int(train_ratio * len(data))
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

# Define the lag order and shift size
lag_order = 12  # 11-hour readings with 5-minute intervals
shift_size = 1  # Shift by 5 minutes

# Create lagged features
X_train = []
y_train = []
for i in range(lag_order, len(train_data) - shift_size):
    X_train.append(train_data['value'].values[i-lag_order:i])
    y_train.append(train_data['value'].values[i+shift_size])
X_train = np.array(X_train)
y_train = np.array(y_train)

# Train the linear autoregressive model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model on the test set
X_test = []
y_test = []
for i in range(lag_order, len(test_data) - shift_size):
    X_test.append(test_data['value'].values[i-lag_order:i])
    y_test.append(test_data['value'].values[i+shift_size])
X_test = np.array(X_test)
y_test = np.array(y_test)

y_pred = model.predict(X_test)

# Evaluation metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Coefficient of Determination (R-squared):", r2)

# Forecast future values
last_observation = data['value'].values[-lag_order:]  # Most recent lagged features
future_steps = 12  # Number of future readings to forecast

future_features = []
for _ in range(future_steps):
    future_features.append(last_observation)
    last_observation = np.roll(last_observation, -1)
    last_observation[-1] = model.predict([last_observation])[0]

future_features = np.array(future_features)

# Print the forecasted values
print("Forecasted values:")
print(future_features)


## Hyperparameter tuning