In [116]:
import os
import csv
import glob
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split

In [2]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

# Debug
print(IN_COLAB)
if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/drive')

True
Mounted at /content/drive


In [15]:
# if IN_COLAB:
#   ds_path = '/content/drive/My Drive/Data/kidney-stone-classification/train/'
# else:
#   ds_path = '/home/'

In [62]:
ds_path = '/content/drive/My Drive/projeto/dataset-article/dataset-64/'

In [125]:
def csv_for_kfold(mode, num_folds, data):

  file_path = f"{ds_path}/{mode}{num_folds}.csv"
  fields = ["filepath", "label"]

  write_header = not os.path.exists(file_path)

  with open(file_path, 'a', newline='') as file:
      writer = csv.DictWriter(file, fieldnames=fields)

      if write_header:
        writer.writeheader()

      writer.writerow(data)

In [126]:
relative_path = os.path.relpath('/content/drive/My Drive/projeto/dataset-article/with-stone/', '/content/drive/My Drive/projeto/')
print(relative_path)

dataset-article/with-stone


In [127]:
def datasets_gen(ds_path):
  # store relative image file path
  X = []
  # store labels with-stone=1 and without-stone=0
  y = []

  for label_path in os.listdir(ds_path):
    # get labels from images folder
    label = 1 if label_path == 'with-stone' else 0

    # iterate over each file in the dataset
    for image_path in glob.glob(f"{ds_path}/{label_path}/*png"):
      rel_path = os.path.relpath(image_path, ds_path)

      X.append(rel_path)
      y.append(label)

  return X, y

In [128]:
X, y = datasets_gen(ds_path)

In [129]:
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, stratify=y)

In [131]:
def datasets_gen_kfold(X, y):

  for num_folds in tqdm(range(2), desc='Processing data'):

    X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, stratify=y)

    data = {}

    for feature, label in tqdm(zip(X_train, y_train), desc='Processing data'):
      print(feature, label)

      data['filepath'] = feature
      data['label'] = label

      csv_for_kfold(mode='train', num_folds=num_folds, data=data)

    for feature, label in zip(X_val, y_val):
      print(feature, label)

      data['filepath'] = feature
      data['label'] = label

      csv_for_kfold(mode='val', num_folds=num_folds, data=data)

In [132]:
datasets_gen_kfold(X, y)

Processing data:   0%|          | 0/2 [00:00<?, ?it/s]
Processing data: 0it [00:00, ?it/s][A
Processing data: 25it [00:00, 172.17it/s]
Processing data:  50%|█████     | 1/2 [00:00<00:00,  5.13it/s]

with-stone/1.3.46.670589.33.1.63700700750059521800001.5070347181582747136.png 1
with-stone/1.3.46.670589.33.1.63703718086200124800001.4686049388312071790.png 1
with-stone/1.3.46.670589.33.1.63703718086239127000001.5591694393880627093.png 1
without-stone/1.3.46.670589.33.1.63684832005599257600001.5733973152527939104.png 0
without-stone/1.3.46.670589.33.1.63684832005954277900001.5229766829332739233.png 0
without-stone/1.3.46.670589.33.1.63710882419050133500001.5343691333297596376.png 0
with-stone/1.3.46.670589.33.1.63700780314615924400001.4656503585389726474.png 1
with-stone/1.3.46.670589.33.1.63703718086140121300001.4751980723480797909.png 1
with-stone/1.3.46.670589.33.1.63705452817813079700001.4930177024992105378.png 1
without-stone/1.3.46.670589.33.1.63710882419159139700001.4780284659474255023.png 0
without-stone/1.3.46.670589.33.1.63710882418965128600001.5397944774910408990.png 0
with-stone/1.3.46.670589.33.1.63703718086101119100001.4867318355234768205.png 1
without-stone/1.3.46.6705


Processing data: 0it [00:00, ?it/s][A

without-stone/1.3.46.670589.33.1.63684832005599257600001.5733973152527939104.png 0



Processing data: 25it [00:00, 188.50it/s]
Processing data: 100%|██████████| 2/2 [00:00<00:00,  5.36it/s]

with-stone/1.3.46.670589.33.1.63700700750059521800001.5070347181582747136.png 1
without-stone/1.3.46.670589.33.1.63710882418965128600001.5397944774910408990.png 0
with-stone/1.3.46.670589.33.1.63703718086181123700001.4795376984778662199.png 1
without-stone/1.3.46.670589.33.1.63684832005665261400001.5232911566168662966.png 0
with-stone/1.3.46.670589.33.1.63705452817990089800001.5743366703585995917.png 1
with-stone/1.3.46.670589.33.1.63700780314615924400001.4656503585389726474.png 1
with-stone/1.3.46.670589.33.1.63700700750188529100001.5659992199131706929.png 1
without-stone/1.3.46.670589.33.1.63710882419050133500001.5343691333297596376.png 0
with-stone/1.3.46.670589.33.1.63703718086120120200001.5487554579919763006.png 1
with-stone/1.3.46.670589.33.1.63703718086101119100001.4867318355234768205.png 1
with-stone/1.3.46.670589.33.1.63705452817813079700001.4930177024992105378.png 1
with-stone/1.3.46.670589.33.1.63703718086220125900001.5014104694799407369.png 1
without-stone/1.3.46.670589.33.


