In [None]:
#1.Loading and Preprocessing (4 marks)

import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset
diabetes_data = load_diabetes()
X = pd.DataFrame(diabetes_data.data, columns=diabetes_data.feature_names)
y = pd.Series(diabetes_data.target)

# Check for missing values
print(X.isnull().sum())

#Normalize the features:

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



In [None]:
#2.Exploratory Data Analysis (EDA) (4 marks)

#Perform EDA to understand the distribution of features and the target variable.

import seaborn as sns
import matplotlib.pyplot as plt

# Visualizing the distribution of the target variable
sns.histplot(y, kde=True)
plt.title('Distribution of Diabetes Progression')
plt.xlabel('Diabetes Progression')
plt.show()

# Correlation heatmap
correlation_matrix = X.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

#Visualize the relationships between features and the target variable.
sns.pairplot(pd.concat([X, y], axis=1), x_vars=X.columns, y_vars=['target'], height=2.5)
plt.show()


In [None]:
#3.Building the ANN Model (4 marks)
'''Design a simple ANN architecture with at least one hidden layer.
Use appropriate activation functions .'''

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the ANN model
model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu'))  # Hidden layer
model.add(Dense(32, activation='relu'))  # Another hidden layer
model.add(Dense(1, activation='linear'))  # Output layer


In [None]:
#4.Training the ANN Model (4 marks)
'''Split the dataset into training and testing sets.
Train the model on the training data.
Use an appropriate loss function and optimizer.'''

model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=1)


In [None]:
#5.Evaluating the Model (3 marks)
'''Evaluate the model on the testing data.
Report the performance metrics (e.g., Mean Squared Error, R² Score).'''

from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')


In [None]:
#6.Improving the Model (5 marks)
'''Experiment with different architectures, activation functions, or hyperparameters to improve the model performance.
Report the changes made and the corresponding improvement in performance.'''

# New model with different architecture
model_improved = Sequential()
model_improved.add(Dense(128, input_dim=X.shape[1], activation='relu'))  # More neurons
model_improved.add(Dense(64, activation='relu'))  # Another hidden layer
model_improved.add(Dense(1, activation='linear'))  # Output layer

model_improved.compile(loss='mean_squared_error', optimizer='adam')
model_improved.fit(X_train, y_train, epochs=200, batch_size=10, verbose=1)

# Evaluate the improved model
y_pred_improved = model_improved.predict(X_test)
mse_improved = mean_squared_error(y_test, y_pred_improved)
r2_improved = r2_score(y_test, y_pred_improved)

print(f'Improved Mean Squared Error: {mse_improved}')
print(f'Improved R² Score: {r2_improved}')
