# Exploratory Data Analysis (EDA) for Scikit-learn Diabetes Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

## Load the Diabetes Dataset

In [None]:
# Load the diabetes dataset
diabetes = load_diabetes()

# Create a DataFrame
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df['target'] = diabetes.target  # Add the target column

## Basic Information and Summary Statistics

In [None]:
# Display basic information
df.info()


In [None]:

# Display first 10 rows
df.head(10)

In [None]:
# Summary statistics
df.describe()

In [None]:
# Check for missing values
df.isnull().sum()

## Visualizations

In [None]:
# Correlation matrix
plt.figure(figsize=(10, 6))
corr_matrix = df.corr()
plt.imshow(corr_matrix, cmap='coolwarm', interpolation='nearest')
plt.colorbar()
plt.xticks(range(len(corr_matrix.columns)), corr_matrix.columns, rotation=90)
plt.yticks(range(len(corr_matrix.columns)), corr_matrix.columns)
plt.title("Correlation Matrix")
plt.show()

In [None]:
# Distribution of target variable
plt.figure(figsize=(8, 5))
plt.hist(df['target'], bins=30, edgecolor='black', alpha=0.7)
plt.title("Distribution of Target Variable")
plt.xlabel("Target")
plt.ylabel("Count")
plt.show()

In [None]:
# Box plots of features
plt.figure(figsize=(12, 6))
df.boxplot(rot=45)
plt.title("Box Plot of Features")
plt.show()

In [None]:
# Scatter plots for top correlated features with target
correlations = df.corr()['target'].drop('target')
top_features = correlations.abs().sort_values(ascending=False).index[:5]

plt.figure(figsize=(15, 5))
for i, feature in enumerate(top_features, 1):
    plt.subplot(2, 3, i)
    plt.scatter(df[feature], df['target'], alpha=0.5)
    plt.xlabel(feature)
    plt.ylabel("Target")
    plt.title(f"{feature} vs Target")
plt.tight_layout()
plt.show()