In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from ydata_profiling import ProfileReport

In [None]:
#Download the CIFAR-10 dataset from TensorFlow Datasets
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
#Class names for CIFAR-10 dataset
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer',
    'dog', 'frog', 'horse', 'ship', 'truck'
]
# Check the shape of the dataset
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

In [None]:
# Visualize some images from the dataset
plt.figure(figsize=(10, 10))
for i in range(10):
    ax = plt.subplot(5, 5, i + 1)
    plt.imshow(X_train[i])
    plt.title(class_names[y_train[i][0]])
    plt.axis("off")
plt.savefig('../results/cifar10_sample_images.png')
plt.show()

In [None]:
# Class distribution in the training set
df_train = pd.DataFrame(y_train, columns=['label'])
df_train['Class'] = df_train['label'].map(dict(enumerate(class_names)))
plt.figure(figsize=(10, 5))
df_train['Class'].value_counts().plot(kind='bar')
plt.title('Class Distribution in CIFAR-10 Training Set')
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.xticks(rotation=45)
plt.savefig('../results/cifar10_class_distribution.png')
plt.show()

In [None]:
profile = ProfileReport(df_train, title="CIFAR-10 Label Distribution")
profile.to_file(f'../results/eda_report.html')