# Data Exploration

In this notebook, we will explore the dataset used for spondylolisthesis grading. We will visualize data distributions and understand the characteristics of the spondylolisthesis images.

In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torchvision import transforms
from PIL import Image

# Set the random seed for reproducibility
np.random.seed(42)
sns.set(style='whitegrid')

In [None]:
# Load the dataset
data_dir = '../data/processed/'  # Adjust the path as necessary
image_files = os.listdir(os.path.join(data_dir, 'images'))
annotations_file = os.path.join(data_dir, 'annotations.csv')

# Load annotations
annotations = pd.read_csv(annotations_file)
annotations.head()

In [None]:
# Visualize the distribution of grades
plt.figure(figsize=(10, 6))
sns.countplot(data=annotations, x='grade')
plt.title('Distribution of Spondylolisthesis Grades')
plt.xlabel('Grade')
plt.ylabel('Count')
plt.show()

In [None]:
# Display sample images from the dataset
def display_sample_images(image_files, n=5):
    plt.figure(figsize=(15, 10))
    for i in range(n):
        img_path = os.path.join(data_dir, 'images', image_files[i])
        img = Image.open(img_path)
        plt.subplot(1, n, i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f'Image {i + 1}')
    plt.show()

display_sample_images(image_files)

In [None]:
# Summary statistics of the dataset
annotations.describe()

## Conclusion

In this notebook, we explored the dataset for spondylolisthesis grading. We visualized the distribution of grades and displayed sample images. Further analysis can be conducted to understand the characteristics of the images in more detail.