In [None]:

import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes

# Load the dataset
data = load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Display the first few rows of the dataset
print(df.head())


In [None]:

# Separate features (X) and target (y)
X = df.drop('target', axis=1)
y = df['target']

# Print the shape of features and target
print("Features shape:", X.shape)
print("Target shape:", y.shape)


In [None]:

from sklearn.preprocessing import StandardScaler

# Initialize StandardScaler
scaler = StandardScaler()

# Standardize the features
X_standardized = scaler.fit_transform(X)

# Check the mean and std of the standardized features
print("Mean after standardization:", np.mean(X_standardized, axis=0))
print("Std after standardization:", np.std(X_standardized, axis=0))


In [None]:

from sklearn.model_selection import train_test_split

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2, random_state=42)

# Print the shape of train and test sets
print("Train features shape:", X_train.shape)
print("Test features shape:", X_test.shape)


In [None]:

from sklearn.linear_model import LinearRegression

# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Print coefficients and intercept
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


In [None]:

from sklearn.metrics import mean_squared_error

# Predict on test data
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


In [None]:

from sklearn.metrics import r2_score

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)


In [None]:

import matplotlib.pyplot as plt

# Plot actual vs predicted values
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Actual vs Predicted Values")
plt.show()


In [None]:

# Save the standardized dataset for reuse
standardized_df = pd.DataFrame(X_standardized, columns=data.feature_names)
standardized_df['target'] = y
standardized_df.to_csv("standardized_diabetes.csv", index=False)

print("Standardized dataset saved.")


In [None]:

import joblib

# Save the model
joblib.dump(model, 'linear_regression_model.pkl')

# Load the model
loaded_model = joblib.load('linear_regression_model.pkl')

# Make predictions with the loaded model
print("Loaded model predictions:", loaded_model.predict(X_test[:5]))
