In [None]:
# This file is used for the following two tasks:
#  1. To reshape and resize the images in the dataset 
#  2. To further divide the training set in the dataset into training and validation sets
#  For this project we will discard the original testing set since we do not have thier labels and cannot comment on the model accuracies
#  Instead, we will treat the valdaition set as the testing set in actual model evaluation.
#  3. This script further tranfers the images into test and training folders

# !pip install opencv-python

In [None]:
#Importing libraries

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import glob
import random
import os
import cv2
from tqdm import tqdm
import PIL
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import tifffile

In [None]:
# Assign paths to the image folders ( we won't be using the data in the 'other' folder)
image_path = r'D:\mayo-clinic-strip-ai\\'
testimage_path = r'D:\mayo-clinic-strip-ai\test'
trainimage_path = r'D:\mayo-clinic-strip-ai\train'
otherimage_path = r'D:\mayo-clinic-strip-ai\other'

In [None]:
# Assign paths to the csv files which map the images to the respective labels.
# The csv file is stored as a Data Frame
train_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\train.csv')
test_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\test.csv')
other_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\other.csv')
samsubmit_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\sample_submission.csv')


In [None]:
#  Preview the csv files
train_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label
0,006388_0,11,006388,0,CE
1,008e5c_0,11,008e5c,0,CE
2,00c058_0,11,00c058,0,LAA
3,01adc5_0,11,01adc5,0,LAA
4,026c97_0,4,026c97,0,CE


In [None]:
#  Preview the csv files
test_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num
0,006388_0,11,006388,0
1,008e5c_0,11,008e5c,0
2,00c058_0,11,00c058,0
3,01adc5_0,11,01adc5,0


In [None]:
#  Preview the csv files
other_df.head()

Unnamed: 0,image_id,patient_id,image_num,other_specified,label
0,01f2b3_0,01f2b3,0,,Unknown
1,01f2b3_1,01f2b3,1,,Unknown
2,02ebd5_0,02ebd5,0,,Unknown
3,0412ab_0,0412ab,0,,Unknown
4,04414e_0,04414e,0,Hypercoagulable,Other


In [None]:
samsubmit_df.head()

Unnamed: 0,patient_id,CE,LAA
0,006388,0.5,0.5
1,008e5c,0.5,0.5
2,00c058,0.5,0.5
3,01adc5,0.5,0.5


In [None]:
train_path = image_path+'train\\'
train_path

'D:\\mayo-clinic-strip-ai\\\\train\\'

In [None]:
# store path of images in the dataframe. 
# This step will be useful when we  create the new resized dataset

train_images_paths = []
train_images       =[]
w = []
h = []
byt = []
train_path = image_path+'train\\'

for i,img in enumerate(os.listdir(train_path)) :
    image_full_path = train_path+img
    train_images_paths.append(image_full_path)
    train_images.append(img)
    w.append(Image.open(train_path+img).width)
    h.append(Image.open(train_path+img).height)
    byt.append(os.stat(train_path+img).st_size/1000)
    
# add path to data frame
train_df.loc[:, ('path')] = train_images_paths 
train_df.loc[:, ('image')] = train_images
train_df.loc[:, ('width')] = w
train_df.loc[:, ('height')] = h
train_df.loc[:, ('KB_size')] = byt
train_df.head(5)

Unnamed: 0,image_id,center_id,patient_id,image_num,label,path,image,width,height,KB_size
0,006388_0,11,006388,0,CE,D:\mayo-clinic-strip-ai\\train\006388_0.tif,006388_0.tif,34007,60797,1312937.514
1,008e5c_0,11,008e5c,0,CE,D:\mayo-clinic-strip-ai\\train\008e5c_0.tif,008e5c_0.tif,5946,29694,109571.43
2,00c058_0,11,00c058,0,LAA,D:\mayo-clinic-strip-ai\\train\00c058_0.tif,00c058_0.tif,15255,61801,351760.03
3,01adc5_0,11,01adc5,0,LAA,D:\mayo-clinic-strip-ai\\train\01adc5_0.tif,01adc5_0.tif,55831,26553,679166.312
4,026c97_0,4,026c97,0,CE,D:\mayo-clinic-strip-ai\\train\026c97_0.tif,026c97_0.tif,10533,8267,61207.84


In [None]:
# Verify the shapr of the data frame
train_df.shape

(754, 10)

In [None]:
# Change the path to where you want to save the new (resized) dataset
new_image_path = r'D:\mayo-clinic-strip-ai\resized\\'

In [None]:
#  Create a new folder
create_train_path_new =os.makedirs(new_image_path+'train')
create_train_path_new =os.makedirs(new_image_path+'test')
create_train_path_new =os.makedirs(new_image_path+'other')


FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'D:\\mayo-clinic-strip-ai\\resized\\\\train'

In [None]:
#  Write the new paths
train_path_new = new_image_path+'train'
test_path_new = new_image_path+'test'
other_path_new = new_image_path+'other'


In [None]:
train_path =image_path+'train\\'
train_path
image_full_path


'D:\\mayo-clinic-strip-ai\\\\train\\ffec5c_1.tif'

In [None]:
image_new_name  = image_full_path.split('\\')[3]
image_new_name

'train'

In [None]:
#  Creating the new (resized) training dataset and maintaing a record in the 
#  csv file
train_images_paths_new = []
train_images_new       = []
kbsize = []
train_path =image_path+'train\\'

for img in  tqdm(os.listdir(train_path)):
    image_full_path = train_path+img
    image_new_name  = image_full_path.split('\\')[-1].split('.')[0]
    imge_new_pathe= train_path_new+'\\'+image_new_name+'.jpg'
    train_images_paths_new.append(imge_new_pathe)
    train_images_new.append(image_new_name+'.jpg')
    image = tifffile.imread(image_full_path)

    # Change the size and save file at new location
    resized_img = cv2.resize(image,(400,400))
    cv2.imwrite(train_path_new+'\\'+image_new_name+'.jpg', resized_img)
    kbsize.append(os.stat(imge_new_pathe).st_size/1000)
    del image_full_path
    del image_new_name
    del image
    del resized_img
      
train_df['new_path']  = train_images_paths_new
train_df['new_image_name'] = train_images_new
train_df['new_KB_size'] = kbsize

100%|██████████| 754/754 [3:53:29<00:00, 18.58s/it]  


In [None]:
#  Preview the new csv file 
train_df.head(10)

Unnamed: 0,image_id,center_id,patient_id,image_num,label,path,image,width,height,KB_size,new_path,new_image_name,new_KB_size
0,006388_0,11,006388,0,CE,D:\mayo-clinic-strip-ai\\train\006388_0.tif,006388_0.tif,34007,60797,1312937.514,D:\mayo-clinic-strip-ai\resized\\train\006388_...,006388_0.jpg,72.85
1,008e5c_0,11,008e5c,0,CE,D:\mayo-clinic-strip-ai\\train\008e5c_0.tif,008e5c_0.tif,5946,29694,109571.43,D:\mayo-clinic-strip-ai\resized\\train\008e5c_...,008e5c_0.jpg,51.837
2,00c058_0,11,00c058,0,LAA,D:\mayo-clinic-strip-ai\\train\00c058_0.tif,00c058_0.tif,15255,61801,351760.03,D:\mayo-clinic-strip-ai\resized\\train\00c058_...,00c058_0.jpg,38.618
3,01adc5_0,11,01adc5,0,LAA,D:\mayo-clinic-strip-ai\\train\01adc5_0.tif,01adc5_0.tif,55831,26553,679166.312,D:\mayo-clinic-strip-ai\resized\\train\01adc5_...,01adc5_0.jpg,42.243
4,026c97_0,4,026c97,0,CE,D:\mayo-clinic-strip-ai\\train\026c97_0.tif,026c97_0.tif,10533,8267,61207.84,D:\mayo-clinic-strip-ai\resized\\train\026c97_...,026c97_0.jpg,48.535
5,028989_0,5,028989,0,LAA,D:\mayo-clinic-strip-ai\\train\028989_0.tif,028989_0.tif,32789,85695,1721260.346,D:\mayo-clinic-strip-ai\resized\\train\028989_...,028989_0.jpg,68.644
6,029c68_0,5,029c68,0,CE,D:\mayo-clinic-strip-ai\\train\029c68_0.tif,029c68_0.tif,33023,34060,426154.43,D:\mayo-clinic-strip-ai\resized\\train\029c68_...,029c68_0.jpg,45.629
7,032f10_0,7,032f10,0,CE,D:\mayo-clinic-strip-ai\\train\032f10_0.tif,032f10_0.tif,6598,28652,92759.536,D:\mayo-clinic-strip-ai\resized\\train\032f10_...,032f10_0.jpg,39.146
8,0372b0_0,7,0372b0,0,CE,D:\mayo-clinic-strip-ai\\train\0372b0_0.tif,0372b0_0.tif,4841,25138,45770.204,D:\mayo-clinic-strip-ai\resized\\train\0372b0_...,0372b0_0.jpg,38.879
9,037300_0,11,037300,0,CE,D:\mayo-clinic-strip-ai\\train\037300_0.tif,037300_0.tif,27346,70968,604650.416,D:\mayo-clinic-strip-ai\resized\\train\037300_...,037300_0.jpg,39.001


In [None]:
# Write code to Save the dataframe into a CSV
train_df.to_csv(r'D:\mayo-clinic-strip-ai\train_new.csv')

In [None]:
# # Visualizing the images

# plt.figure(figsize=(15,10))
# plt.suptitle("Resized Training Images", fontsize=20)
# path = train_path_new+'\\'
# counter = 0

# for i,img in enumerate(os.listdir(path))  :
#         plt.subplot(3,3,i+1)
#         full_image= Image.open(path+img)
#         full_image_1 =full_image.resize((512,512))
#         plt.xticks([])
#         plt.yticks([])
#         plt.grid(False)
#         plt.imshow(full_image_1, cmap=plt.cm.binary) 
#         if i == 8:
#             break
#             del  full_image_1

In [None]:
#  Creating the new (resized) test dataset and maintaing a record in the 
#  csv file
test_images_paths_new = []
test_images_new       = []
kbsize = []
test_path =image_path+'test\\'

for img in  tqdm(os.listdir(test_path)):
    image_full_path = test_path+img
    image_new_name  = image_full_path.split('\\')[-1].split('.')[0]
    imge_new_pathe= test_path_new+'\\'+image_new_name+'.jpg'
    test_images_paths_new.append(imge_new_pathe)
    test_images_new.append(image_new_name+'.jpg')
    image = tifffile.imread(image_full_path)
    # Change the size 
    resized_img = cv2.resize(image,(400,400))
    cv2.imwrite(test_path_new+'\\'+image_new_name+'.jpg', resized_img)
    kbsize.append(os.stat(imge_new_pathe).st_size/1000)
    del image_full_path
    del image_new_name
    del image
    del resized_img
      
test_df['new_path']  = test_images_paths_new
test_df['new_image_name'] = test_images_new
test_df['new_KB_size'] = kbsize

100%|██████████| 4/4 [02:03<00:00, 30.80s/it]


In [None]:
# Write code to Save the dataframe into a CSV
test_df.to_csv(r'D:\mayo-clinic-strip-ai\test_new.csv')

In [None]:
#  Creating the new (resized) other dataset and maintaing a record in the 
#  csv file
other_images_paths_new = []
other_images_new       = []
kbsize = []
other_path =image_path+'other\\'

for img in  tqdm(os.listdir(other_path)):
    image_full_path = other_path+img
    image_new_name  = image_full_path.split('\\')[-1].split('.')[0]
    imge_new_pathe= other_path_new+'\\'+image_new_name+'.jpg'
    other_images_paths_new.append(imge_new_pathe)
    other_images_new.append(image_new_name+'.jpg')
    image = tifffile.imread(image_full_path)
    # Change the size 
    resized_img = cv2.resize(image,(400,400))
    cv2.imwrite(other_path_new+'\\'+image_new_name+'.jpg', resized_img)
    kbsize.append(os.stat(imge_new_pathe).st_size/1000)
    del image_full_path
    del image_new_name
    del image
    del resized_img
      
other_df['new_path']  = other_images_paths_new
other_df['new_image_name'] = other_images_new
other_df['new_KB_size'] = kbsize

100%|██████████| 396/396 [3:00:31<00:00, 27.35s/it]    


In [None]:
# Write code to Save the dataframe into a CSV
other_df.to_csv(r'D:\mayo-clinic-strip-ai\other_new.csv')

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
# Preview the training data frame
train_new_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\train_new.csv')
train_new_df.head(10)

Unnamed: 0.1,Unnamed: 0,image_id,center_id,patient_id,image_num,label,path,image,width,height,KB_size,new_path,new_image_name,new_KB_size
0,0,006388_0,11,006388,0,CE,D:\mayo-clinic-strip-ai\\train\006388_0.tif,006388_0.tif,34007,60797,1312937.514,D:\mayo-clinic-strip-ai\resized\\train\006388_...,006388_0.jpg,72.85
1,1,008e5c_0,11,008e5c,0,CE,D:\mayo-clinic-strip-ai\\train\008e5c_0.tif,008e5c_0.tif,5946,29694,109571.43,D:\mayo-clinic-strip-ai\resized\\train\008e5c_...,008e5c_0.jpg,51.837
2,2,00c058_0,11,00c058,0,LAA,D:\mayo-clinic-strip-ai\\train\00c058_0.tif,00c058_0.tif,15255,61801,351760.03,D:\mayo-clinic-strip-ai\resized\\train\00c058_...,00c058_0.jpg,38.618
3,3,01adc5_0,11,01adc5,0,LAA,D:\mayo-clinic-strip-ai\\train\01adc5_0.tif,01adc5_0.tif,55831,26553,679166.312,D:\mayo-clinic-strip-ai\resized\\train\01adc5_...,01adc5_0.jpg,42.243
4,4,026c97_0,4,026c97,0,CE,D:\mayo-clinic-strip-ai\\train\026c97_0.tif,026c97_0.tif,10533,8267,61207.84,D:\mayo-clinic-strip-ai\resized\\train\026c97_...,026c97_0.jpg,48.535
5,5,028989_0,5,028989,0,LAA,D:\mayo-clinic-strip-ai\\train\028989_0.tif,028989_0.tif,32789,85695,1721260.346,D:\mayo-clinic-strip-ai\resized\\train\028989_...,028989_0.jpg,68.644
6,6,029c68_0,5,029c68,0,CE,D:\mayo-clinic-strip-ai\\train\029c68_0.tif,029c68_0.tif,33023,34060,426154.43,D:\mayo-clinic-strip-ai\resized\\train\029c68_...,029c68_0.jpg,45.629
7,7,032f10_0,7,032f10,0,CE,D:\mayo-clinic-strip-ai\\train\032f10_0.tif,032f10_0.tif,6598,28652,92759.536,D:\mayo-clinic-strip-ai\resized\\train\032f10_...,032f10_0.jpg,39.146
8,8,0372b0_0,7,0372b0,0,CE,D:\mayo-clinic-strip-ai\\train\0372b0_0.tif,0372b0_0.tif,4841,25138,45770.204,D:\mayo-clinic-strip-ai\resized\\train\0372b0_...,0372b0_0.jpg,38.879
9,9,037300_0,11,037300,0,CE,D:\mayo-clinic-strip-ai\\train\037300_0.tif,037300_0.tif,27346,70968,604650.416,D:\mayo-clinic-strip-ai\resized\\train\037300_...,037300_0.jpg,39.001


In [None]:
#  Convert the head of the dataframe into an array
array = np.array([train_new_df.image_id , train_new_df.center_id, train_new_df.patient_id, train_new_df.image_num, train_new_df.path, train_new_df.image, train_new_df.new_path, train_new_df.new_image_name, train_new_df.new_KB_size])
np.shape(np.transpose(array))
# np.shape(array)

(754, 9)

In [None]:
# Further dividng the training set training and validation
x_train, x_test, y_train, y_test = train_test_split(np.transpose(array),train_new_df.label)

In [None]:
# Preview the training set
x_train

array([['0c60b8_0', 1, '0c60b8', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\0c60b8_0.jpg',
        '0c60b8_0.jpg', 16.576],
       ['957643_0', 11, '957643', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\957643_0.jpg',
        '957643_0.jpg', 31.836],
       ['fe9bec_0', 4, 'fe9bec', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\fe9bec_0.jpg',
        'fe9bec_0.jpg', 22.799],
       ...,
       ['8e0244_0', 4, '8e0244', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\8e0244_0.jpg',
        '8e0244_0.jpg', 57.342],
       ['e251ff_0', 4, 'e251ff', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\e251ff_0.jpg',
        'e251ff_0.jpg', 31.463],
       ['9a807e_0', 7, '9a807e', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\9a807e_0.jpg',
        '9a807e_0.jpg', 27.46]], dtype=object)

In [None]:
# Preview the testing (validation) set
x_test

array([['82399d_1', 7, '82399d', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\82399d_1.jpg',
        '82399d_1.jpg', 35.08],
       ['1db82d_0', 4, '1db82d', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\1db82d_0.jpg',
        '1db82d_0.jpg', 33.395],
       ['e10f49_0', 3, 'e10f49', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\e10f49_0.jpg',
        'e10f49_0.jpg', 15.439],
       ...,
       ['291099_0', 9, '291099', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\291099_0.jpg',
        '291099_0.jpg', 39.496],
       ['827928_1', 5, '827928', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\827928_1.jpg',
        '827928_1.jpg', 49.61],
       ['fd7c5b_0', 2, 'fd7c5b', ...,
        'D:\\mayo-clinic-strip-ai\\resized\\\\train\\fd7c5b_0.jpg',
        'fd7c5b_0.jpg', 35.023]], dtype=object)

In [None]:
# Preview the training labels
y_train

42      CE
443     CE
750    LAA
141    LAA
612     CE
      ... 
328    LAA
90      CE
421    LAA
653     CE
454     CE
Name: label, Length: 565, dtype: object

In [None]:
# Preview the testing labels
y_test

388     CE
84     LAA
651     CE
103     CE
17      CE
      ... 
44      CE
317     CE
124     CE
392     CE
741     CE
Name: label, Length: 189, dtype: object

In [None]:
np.array(y_test)

array(['CE', 'LAA', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'LAA', 'CE',
       'LAA', 'CE', 'CE', 'LAA', 'CE', 'CE', 'LAA', 'CE', 'CE', 'LAA',
       'LAA', 'CE', 'LAA', 'LAA', 'CE', 'LAA', 'CE', 'LAA', 'LAA', 'LAA',
       'CE', 'CE', 'LAA', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE',
       'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'LAA', 'CE', 'CE', 'CE', 'LAA',
       'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'LAA',
       'CE', 'LAA', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'CE',
       'LAA', 'CE', 'LAA', 'CE', 'LAA', 'CE', 'LAA', 'LAA', 'CE', 'CE',
       'LAA', 'CE', 'LAA', 'CE', 'CE', 'LAA', 'CE', 'CE', 'LAA', 'CE',
       'LAA', 'LAA', 'CE', 'CE', 'LAA', 'CE', 'LAA', 'LAA', 'CE', 'LAA',
       'CE', 'CE', 'CE', 'LAA', 'CE', 'CE', 'CE', 'LAA', 'LAA', 'LAA',
       'CE', 'CE', 'CE', 'CE', 'CE', 'CE', 'LAA', 'CE', 'CE', 'LAA', 'CE',
       'CE', 'CE', 'LAA', 'CE', 'LAA', 'CE', 'CE', 'CE', 'CE', 'LAA',
       'CE', 'LAA', 'LAA', 'LAA', 'CE', 'CE', 'CE', 'CE',

In [None]:
# verify shape of training set
np.shape(x_train)

(565, 9)

In [None]:
# verify shape of testing set
np.shape(x_test)

(189, 9)

In [None]:
from numpy import asarray
from numpy import savetxt

In [None]:
train_from_training_set = x_train
test_from_training_set = x_test

# save to csv file
savetxt('train_dataset.csv', train_from_training_set, delimiter=',')
savetxt('test_dataset.csv', test_from_training_set, delimiter=',')

In [None]:
import csv

In [None]:
#  Create a new csv file with jus the new training dataset
# field names 
fields = ['image_id','center_id', 'patient_id','image_num','path','image','new_path','new_image_name','new_KB_size'] 
    
# data rows of csv file 
rows = x_train

    
# name of csv file 
filename = "training_data.csv"
    
# writing to csv file 
with open(filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(fields) 
        
    # writing the data rows 
    csvwriter.writerows(rows)

In [None]:
# save the csv
training_records_df = pd.read_csv(r'training_data.csv')

In [None]:
# Preview the csv
training_records_df

Unnamed: 0,image_id,center_id,patient_id,image_num,path,image,new_path,new_image_name,new_KB_size
0,0c60b8_0,1,0c60b8,0,D:\mayo-clinic-strip-ai\\train\0c60b8_0.tif,0c60b8_0.tif,D:\mayo-clinic-strip-ai\resized\\train\0c60b8_...,0c60b8_0.jpg,16.576
1,957643_0,11,957643,0,D:\mayo-clinic-strip-ai\\train\957643_0.tif,957643_0.tif,D:\mayo-clinic-strip-ai\resized\\train\957643_...,957643_0.jpg,31.836
2,fe9bec_0,4,fe9bec,0,D:\mayo-clinic-strip-ai\\train\fe9bec_0.tif,fe9bec_0.tif,D:\mayo-clinic-strip-ai\resized\\train\fe9bec_...,fe9bec_0.jpg,22.799
3,2e8763_0,10,2e8763,0,D:\mayo-clinic-strip-ai\\train\2e8763_0.tif,2e8763_0.tif,D:\mayo-clinic-strip-ai\resized\\train\2e8763_...,2e8763_0.jpg,32.484
4,d53809_0,5,d53809,0,D:\mayo-clinic-strip-ai\\train\d53809_0.tif,d53809_0.tif,D:\mayo-clinic-strip-ai\resized\\train\d53809_...,d53809_0.jpg,51.981
...,...,...,...,...,...,...,...,...,...
560,69d655_0,7,69d655,0,D:\mayo-clinic-strip-ai\\train\69d655_0.tif,69d655_0.tif,D:\mayo-clinic-strip-ai\resized\\train\69d655_...,69d655_0.jpg,25.281
561,1f9d4f_1,10,1f9d4f,1,D:\mayo-clinic-strip-ai\\train\1f9d4f_1.tif,1f9d4f_1.tif,D:\mayo-clinic-strip-ai\resized\\train\1f9d4f_...,1f9d4f_1.jpg,39.231
562,8e0244_0,4,8e0244,0,D:\mayo-clinic-strip-ai\\train\8e0244_0.tif,8e0244_0.tif,D:\mayo-clinic-strip-ai\resized\\train\8e0244_...,8e0244_0.jpg,57.342
563,e251ff_0,4,e251ff,0,D:\mayo-clinic-strip-ai\\train\e251ff_0.tif,e251ff_0.tif,D:\mayo-clinic-strip-ai\resized\\train\e251ff_...,e251ff_0.jpg,31.463


In [None]:
#  Logic flow of the program

# for every entry in the array
# take the first [0th] entry and store it as a string
# Find that file in the folder
# move that intro a training folder



In [None]:
#  Create a new csv file with just the new testing dataset
# field names 
fields = ['image_id','center_id', 'patient_id','image_num','path','image','new_path','new_image_name','new_KB_size'] 
    
# data rows of csv file 
rows = x_test

    
# name of csv file 
filename = "testing_data.csv"
    
# writing to csv file 
with open(filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(fields) 
        
    # writing the data rows 
    csvwriter.writerows(rows)

In [None]:
#  Create a new csv file with just the new testing labels
# field names 
fields = ['label'] 
    
# data rows of csv file 
rows = np.array(y_test)

    
# name of csv file 
filename = "testing_labels.csv"
    
# writing to csv file 
with open(filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(fields) 
        
    # writing the data rows 
    csvwriter.writerows(rows)

In [None]:
#  Create a new csv file with jus the new training labels
# field names 
fields = ['label'] 
    
# data rows of csv file 
rows = np.array(y_train)
    
# name of csv file 
filename = "training_labels.csv"
    
# writing to csv file 
with open(filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(fields) 
        
    # writing the data rows 
    csvwriter.writerows(rows)
    
#     Data resulted into each label getting split into different columns. It is manuall corrected on excel (Combine the columns using 'concatenate' command )

In [None]:
new_train_path = r'D:\mayo-clinic-strip-ai\resized\train';

train_train_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\training_data.csv')


In [None]:
train_train_df

Unnamed: 0,image_id,center_id,patient_id,image_num,path,image,new_path,new_image_name,new_KB_size
0,0c60b8_0,1,0c60b8,0,D:\mayo-clinic-strip-ai\\train\0c60b8_0.tif,0c60b8_0.tif,D:\mayo-clinic-strip-ai\resized\\train\0c60b8_...,0c60b8_0.jpg,16.576
1,957643_0,11,957643,0,D:\mayo-clinic-strip-ai\\train\957643_0.tif,957643_0.tif,D:\mayo-clinic-strip-ai\resized\\train\957643_...,957643_0.jpg,31.836
2,fe9bec_0,4,fe9bec,0,D:\mayo-clinic-strip-ai\\train\fe9bec_0.tif,fe9bec_0.tif,D:\mayo-clinic-strip-ai\resized\\train\fe9bec_...,fe9bec_0.jpg,22.799
3,2e8763_0,10,2e8763,0,D:\mayo-clinic-strip-ai\\train\2e8763_0.tif,2e8763_0.tif,D:\mayo-clinic-strip-ai\resized\\train\2e8763_...,2e8763_0.jpg,32.484
4,d53809_0,5,d53809,0,D:\mayo-clinic-strip-ai\\train\d53809_0.tif,d53809_0.tif,D:\mayo-clinic-strip-ai\resized\\train\d53809_...,d53809_0.jpg,51.981
...,...,...,...,...,...,...,...,...,...
560,69d655_0,7,69d655,0,D:\mayo-clinic-strip-ai\\train\69d655_0.tif,69d655_0.tif,D:\mayo-clinic-strip-ai\resized\\train\69d655_...,69d655_0.jpg,25.281
561,1f9d4f_1,10,1f9d4f,1,D:\mayo-clinic-strip-ai\\train\1f9d4f_1.tif,1f9d4f_1.tif,D:\mayo-clinic-strip-ai\resized\\train\1f9d4f_...,1f9d4f_1.jpg,39.231
562,8e0244_0,4,8e0244,0,D:\mayo-clinic-strip-ai\\train\8e0244_0.tif,8e0244_0.tif,D:\mayo-clinic-strip-ai\resized\\train\8e0244_...,8e0244_0.jpg,57.342
563,e251ff_0,4,e251ff,0,D:\mayo-clinic-strip-ai\\train\e251ff_0.tif,e251ff_0.tif,D:\mayo-clinic-strip-ai\resized\\train\e251ff_...,e251ff_0.jpg,31.463


In [None]:
type(train_train_df.new_path)

pandas.core.series.Series

In [None]:
train_train_df.new_path[0]

'D:\\mayo-clinic-strip-ai\\resized\\\\train\\0c60b8_0.jpg'

In [None]:
import shutil
#  This allows us to transfer the images from the common folder (in this case the training folder) into a new training and valdiation/test folder

In [None]:
# Assign paths
image_new_name  = train_train_df.new_path[0].split('\\')[-1].split('.')[0]
image_new_pathe= 'D:\mayo-clinic-strip-ai\resized\train\train'+'\\'+image_new_name+'.jpg'
image_new_pathe
# imge_new_pathe= other_path_new+'\\'+image_new_name+'.jpg'

'D:\\mayo-clinic-strip-ai\resized\train\train\\0c60b8_0.jpg'

In [None]:
# Copy the training examples from train folder into another train folder
for entry in train_train_df.new_path:
#     print(entry)
    image_new_name  = entry.split('\\')[-1].split('.')[0]
    image_new_pathe= 'D:\mayo-clinic-strip-ai\\resized\\train\\train'+'\\'+image_new_name+'.jpg'    
#     print(image_new_pathe)
    shutil.copy(entry,image_new_pathe)


In [None]:
train_test_df = pd.read_csv(r'D:\mayo-clinic-strip-ai\testing_data.csv')


In [None]:
train_test_df

Unnamed: 0,image_id,center_id,patient_id,image_num,path,image,new_path,new_image_name,new_KB_size
0,82399d_1,7,82399d,1,D:\mayo-clinic-strip-ai\\train\82399d_1.tif,82399d_1.tif,D:\mayo-clinic-strip-ai\resized\\train\82399d_...,82399d_1.jpg,35.080
1,1db82d_0,4,1db82d,0,D:\mayo-clinic-strip-ai\\train\1db82d_0.tif,1db82d_0.tif,D:\mayo-clinic-strip-ai\resized\\train\1db82d_...,1db82d_0.jpg,33.395
2,e10f49_0,3,e10f49,0,D:\mayo-clinic-strip-ai\\train\e10f49_0.tif,e10f49_0.tif,D:\mayo-clinic-strip-ai\resized\\train\e10f49_...,e10f49_0.jpg,15.439
3,2394f6_0,11,2394f6,0,D:\mayo-clinic-strip-ai\\train\2394f6_0.tif,2394f6_0.tif,D:\mayo-clinic-strip-ai\resized\\train\2394f6_...,2394f6_0.jpg,40.413
4,049194_0,5,049194,0,D:\mayo-clinic-strip-ai\\train\049194_0.tif,049194_0.tif,D:\mayo-clinic-strip-ai\resized\\train\049194_...,049194_0.jpg,61.025
...,...,...,...,...,...,...,...,...,...
184,0d4164_0,11,0d4164,0,D:\mayo-clinic-strip-ai\\train\0d4164_0.tif,0d4164_0.tif,D:\mayo-clinic-strip-ai\resized\\train\0d4164_...,0d4164_0.jpg,58.300
185,65aadd_0,11,65aadd,0,D:\mayo-clinic-strip-ai\\train\65aadd_0.tif,65aadd_0.tif,D:\mayo-clinic-strip-ai\resized\\train\65aadd_...,65aadd_0.jpg,46.183
186,291099_0,9,291099,0,D:\mayo-clinic-strip-ai\\train\291099_0.tif,291099_0.tif,D:\mayo-clinic-strip-ai\resized\\train\291099_...,291099_0.jpg,39.496
187,827928_1,5,827928,1,D:\mayo-clinic-strip-ai\\train\827928_1.tif,827928_1.tif,D:\mayo-clinic-strip-ai\resized\\train\827928_...,827928_1.jpg,49.610


In [None]:
# Copy the testing examples from train folder into test folder
for entry in train_test_df.new_path:
#     print(entry)
    image_new_name  = entry.split('\\')[-1].split('.')[0]
    image_new_pathe= 'D:\mayo-clinic-strip-ai\\resized\\train\\test'+'\\'+image_new_name+'.jpg'    
#     print(image_new_pathe)
    shutil.copy(entry,image_new_pathe)
