### Imports

In [12]:
import os
from PIL import Image
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np


In [6]:
class CustomDataset(Dataset):
    def __init__(self, data_folder, csv_file, transform=None):
        self.data_folder = data_folder
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_folder, self.df.iloc[idx]['original_title'])
        image = Image.open(img_name).convert('RGB')

        target = self.df.iloc[idx]['target_column']  # Replace 'target_column' with the actual column name in your CSV

        if self.transform:
            image = self.transform(image)

        return image, target

Resize (transforms.Resize((192, 192))):

Objective: Standardizes the size of the image.
How: Resizes the image to a fixed size of 192x192 pixels.
Random Horizontal Flip (transforms.RandomHorizontalFlip()):

Objective: Enhances the model's ability to recognize objects regardless of their orientation.
How: Flips the image horizontally with a 50% probability.
Random Vertical Flip (transforms.RandomVerticalFlip()):

Objective: Adds more variety to the training data by flipping images vertically.
How: Flips the image vertically with a 50% probability.
Random Rotation (transforms.RandomRotation(degrees=20)):

Objective: Enhances rotation invariance of the model.
How: Applies a random rotation to the image, up to 20 degrees.
Color Jitter (transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)):

Objective: Simulates different lighting conditions and color variations.
How: Randomly adjusts brightness, contrast, saturation, and hue.
Gaussian Blur (transforms.GaussianBlur(kernel_size=3)):

Objective: Simulates the effect of dirt or haze.
How: Applies random Gaussian blur to the image. Adjust the kernel size as needed.
Random Noise (transforms.RandomApply([transforms.Lambda(lambda x: x + 0.01 * torch.randn_like(x))], p=0.5)):

Objective: Simulates environmental conditions and adds robustness.
How: Introduces random noise to the image with a 50% probability.
Random Grayscale (transforms.RandomGrayscale(p=0.1)):

Objective: Accounts for potential variations in color.
How: Converts the image to grayscale with a 10% probability.
To Tensor (transforms.ToTensor()):

Objective: Converts the image to a PyTorch tensor.
How: Transforms the image data into a format suitable for deep learning.
Normalize (transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])):

Objective: Ensures consistent and standardized input data.
How: Normalizes the pixel values of the tensor to have a mean of 0.5 and a standard deviation of 0.5 for each channel.

In [8]:
transform = transforms.Compose([
    transforms.Resize((192, 192)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees=20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.GaussianBlur(kernel_size=3),  # You can adjust the kernel size
    transforms.RandomApply([transforms.Lambda(lambda x: x + 0.01 * torch.randn_like(x))], p=0.5),  # Add random noise
    transforms.RandomGrayscale(p=0.1),  # Convert to grayscale with a probability of 0.1
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [10]:
data_folder = 'SolarPanelSoilingImageDataset\Solar_Panel_Soiling_Image_dataset\Pytorch_data/'
csv_file = 'filtered_data.csv'

In [11]:
# Create an instance of the CustomDataset with the preprocessing transform
custom_dataset = CustomDataset(data_folder, csv_file, transform=transform)

# Create a DataLoader to load batches of data
batch_size = 64
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

In [13]:
def show_images(images, labels):
    fig, axs = plt.subplots(4, 8, figsize=(15, 8))
    axs = axs.flatten()

    for i in range(len(axs)):
        image, label = images[i], labels[i]
        image = image.numpy().transpose((1, 2, 0))
        mean = np.array([0.5, 0.5, 0.5])
        std = np.array([0.5, 0.5, 0.5])
        image = std * image + mean
        image = np.clip(image, 0, 1)

        axs[i].imshow(image)
        axs[i].set_title(f"Label: {label}")
        axs[i].axis('off')

    plt.show()

# Get a batch of data
for batch_images, batch_labels in data_loader:
    show_images(batch_images, batch_labels)
    break

KeyError: 'target_column'

In [4]:
'''
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, csv_file, transform=None):
        self.image_folder = image_folder
        self.csv_file = csv_file
        self.transform = transform

        # Read CSV file into a pandas DataFrame
        self.df = pd.read_csv(csv_file)

        # Create a dictionary to map image filenames to their corresponding targets
        self.image_to_target = {}
        for index, row in self.df.iterrows():
            image_filename = row['original_title'] + '.jpg'
            target = row['original_title']
            self.image_to_target[image_filename] = target

    def __len__(self):
        return len(self.image_to_target)

    def __getitem__(self, index):
        # Get image filename and target for the current index
        image_filename, target = list(self.image_to_target.items())[index]
        image_path = os.path.join(self.image_folder, image_filename)

        # Load image
        image = Image.open(image_path)

        # Apply transformation if provided
        if self.transform is not None:
            image = self.transform(image)

        # Convert image to tensor
        image = torch.from_numpy(image.numpy().transpose(2, 0, 1)).float()

        # Return image tensor and target
        return image, target

'''

In [5]:
image_folder = 'SolarPanelSoilingImageDataset\Solar_Panel_Soiling_Image_dataset\Pytorch_data/'
csv_file = 'filtered_data.csv'
dataset = ImageDataset(image_folder, csv_file)

In [7]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the CSV file with target values.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, str(self.df.iloc[idx, 0]))
        image = Image.open(img_name)

        # Assuming the target column is named 'loss_percentage'
        target = torch.tensor(self.df.iloc[idx, 1], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, target

# Specify the paths
csv_file = 'filtered_data.csv'
root_dir = 'SolarPanelSoilingImageDataset\\Solar_Panel_Soiling_Image_dataset\\Pytorch_data'

# Define a transformation (resize, normalize, etc.) if needed
#transform = transforms.Compose([
#    transforms.Resize((224, 224)),
#    transforms.ToTensor(),
#])

# Create an instance of the CustomDataset
custom_dataset = CustomDataset(csv_file=csv_file, root_dir=root_dir)

# Access an example from the dataset
sample_image, sample_target = custom_dataset[0]
print(f'Sample Image Shape: {sample_image.shape}')
print(f'Sample Target: {sample_target}')

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\tfrod\\Desktop\\CV Project\\SolarPanelSoilingImageDataset\\Solar_Panel_Soiling_Image_dataset\\Pytorch_data\\6'