In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
data = pd.read_csv('diabetes.csv')

# Differentiate between independent and dependent variables
X = data.drop('Progression', axis=1)
Y = data['Progression']

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Scale the data using MinMaxScaler and StandardScaler
scaler_minmax = MinMaxScaler()
scaler_standard = StandardScaler()

# Fit and transform on the training set
X_train_minmax = scaler_minmax.fit_transform(X_train)
X_test_minmax = scaler_minmax.transform(X_test)

X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.transform(X_test)

# Train a multiple linear regression model
model = LinearRegression()
model.fit(X_train_standard, Y_train)

# Print the intercept and coefficients of the model
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

# Make predictions on the test set
Y_pred = model.predict(X_test_standard)

# Compute R-squared for the model on the test set
r2 = r2_score(Y_test, Y_pred)
print("R-squared:", r2)


Intercept: 153.73654390934846
Coefficients: [  1.75375799 -11.51180908  25.60712144  16.82887167 -44.44885564
  24.64095356   7.67697768  13.1387839   35.16119521   2.35136365]
R-squared: 0.4526027629719189
