In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the data
data = pd.read_csv('diabetes_updated.csv')

# Separate the independent variables (X) and the dependent variable (Y)
X = data.drop('Outcome', axis=1)
Y = data['Outcome']

# Split the data into training and test sets (80% training, 20% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Identify which features require scaling and apply StandardScaler
scaler = StandardScaler()
features_to_scale = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X_train[features_to_scale] = scaler.fit_transform(X_train[features_to_scale])
X_test[features_to_scale] = scaler.transform(X_test[features_to_scale])

# Generate a multiple linear regression model
model = LinearRegression()
model.fit(X_train, Y_train)

# Print the intercept and coefficients of the trained model
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

# Make predictions on the test set
Y_pred = model.predict(X_test)

# Compare the predicted values to the original data set
print("Predicted values:", Y_pred)
print("Original values:", Y_test.values)

# To make the interpretation of predictions more meaningful, we can interpret the coefficients of the model to see the impact of each feature on the outcome.

# Compute R-squared for the model on the test set
r_squared = r2_score(Y_test, Y_pred)
print("R-squared score:", r_squared)


Intercept: 0.34690553745928343
Coefficients: [ 0.03465559  0.1803234  -0.04219339  0.00820563 -0.03230381  0.11631364
  0.03744793  0.07425473]
Predicted values: [ 0.33550028  0.23809869  0.1510522   0.2401365   0.48142376  0.45257375
 -0.17450469  0.60662287  0.52417796  0.70476953  0.32360466  0.85290601
  0.38466612  0.36056948  0.09946712  0.41539557  0.17869123  0.07782301
  0.80730861  0.51299477  0.28090594  0.08303057  0.5099157   0.11381771
  0.51325022  0.82528549  0.17892718 -0.0594202   0.28338572  0.16407949
  0.83851225  0.80737515  0.68154389  0.7649502   0.56140297  0.62123131
  1.06134554  0.30990775  0.51752336  0.63691482  0.07075333  0.57757007
  0.55015462  0.37541745 -0.07644182  0.50119208  0.59600162  0.27464761
  0.42477995  0.9941898   0.00969584  0.61763578  0.73395288  0.31090975
  0.13456812 -0.02536316  0.71219147 -0.30518218  0.41994556  0.67869594
  0.66891428  0.3798452   0.2956646   0.288035    0.06813053  0.55464338
  0.01368504  0.6272007  -0.0203328