
#Load the Dataset and convert from .npz to .jpg
```
npz = np.load("Dataset5_raw_val.npz")
image_data = npz['image']
image_names = npz['image_name']
image_labels = npz['image_label']

for i in range(len(image_data)):
    image_array = image_data[i]
    image = Image.fromarray(np.uint8(image_array))
    
    image_name = image_names[i][0]
    image_label = np.uint8(image_labels[i][0])
    
    filename = f"{image_label}_{image_name}.jpg"
    image.save(f"val_data/{filename}")
```

# Set directory where files are stored

```
directory = 'new_train/'
```

# Get a list of all files in the directory

```
files = os.listdir(directory)

for i, filename in enumerate(files):
    # Split the filename into its parts
    name, ext = os.path.splitext(filename)
    label, img_name = name.split('_', 1)

    # Pad image name with zeros to make it 3 digits long
    img_name = str(i).zfill(5)

    # Construct new filename
    new_filename = f"{img_name}.jpg"

    # Rename the file
    os.rename(os.path.join(directory, filename), os.path.join(f"data/{label}", new_filename))
```

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

In [None]:
def data_sampling(indices):
  return torch.utils.data.sampler.SubsetRandomSampler(indices)

In [None]:
#Data Preprocessing
def data_preprocess(data_path, sample_ratio):
  # Create data transforms
  data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

  # Get dataset from folder and apply data transforms
  dataset = datasets.ImageFolder(root = "{}data".format(data_path), transform = data_transforms)
    
  # Get a sample of the data randomly
  num_samples = int(len(dataset) * sample_ratio)
  indices = np.random.choice(range(len(dataset)), num_samples, replace = False)

  # Split the data into training, test, and validation sets
  train_size = int(0.7 * num_samples)
  test_size = int(0.2 * num_samples)
  val_size = num_samples - train_size - test_size

  train_indices = indices[ : train_size]
  test_indices = indices[train_size : train_size + test_size]
  val_indices = indices[train_size + test_size : ]

  samples = [data_sampling(i) for i in [train_indices, test_indices, val_indices]]

  # Create data loaders for training, test, and validation sets
  train_loader = DataLoader(dataset, batch_size = batch_size, sampler = samples[0])
  test_loader = DataLoader(dataset, batch_size = batch_size, sampler = samples[1])
  val_loader = DataLoader(dataset, batch_size = batch_size, sampler = samples[2])

  return dataset, train_loader, train_indices, test_loader, test_indices, val_loader, val_indices

In [None]:
def evaluate_model(model, dataloader, data_size, dtype, criterion, data_path, model_name):
  _loss, _pred, _true = 0.0, [], []
  model.eval()

  with torch.no_grad():
      for inputs, labels in dataloader:
          outputs = model(inputs)
          loss = criterion(outputs, labels)

          _loss += loss.item() * inputs.size(0)
          _, predicted = torch.max(outputs.data, 1)
          _pred.extend(predicted.cpu().numpy())
          _true.extend(labels.cpu().numpy())
  
  _loss /= len(data_size)
  _recall = recall_score(_true, _pred, average='macro')
  _precision = precision_score(_true, _pred, average='macro')
  _fscore = f1_score(_true, _pred, average='macro')

  print('MODEL PERFORMANCE ON {} SET'.format(dtype))
  print('Loss: {:.4f}, Recall: {:.4f}, Precision: {:.4f}, F-score: {:.4f}'.format(_loss, _recall, _precision, _fscore))
  print("")

  cm = confusion_matrix(_true, _pred)
  plt.figure(figsize = (8, 8))
  plt.imshow(cm, cmap = plt.cm.Blues)
  plt.title("{}_{}SET_CONFUSION_MATRIX".format(model_name, dtype))
  plt.colorbar()
  plt.savefig("{}_{}SET_CONFUSION_MATRIX.png".format(model_name, dtype))