In [1]:
%matplotlib inline

In [2]:
import pandas
import numpy
import matplotlib
import os
import matplotlib.pyplot as plt
from PIL import Image

In [3]:
!pwd

/Users/michaelkudlaty/Documents/GitHub/Optomap-classification


In [4]:
dataDirectory = "Data"

In [5]:
os.listdir(dataDirectory)

['.DS_Store', 'Labeled', 'Unlabeled']

In [6]:
labeledDirectory = os.path.join(dataDirectory, "Labeled")
unlabeledDirectory = os.path.join(dataDirectory, "Unlabeled")

In [7]:
labeledFiles = os.listdir(labeledDirectory)
unlabeledFiles = os.listdir(unlabeledDirectory)

print("Number of Labeled Image Files: {0}".format(len(labeledFiles)))
print("Number of Unlabeled Image Files: {0}".format(len(unlabeledFiles)))

Number of Labeled Image Files: 44
Number of Unlabeled Image Files: 7840


In [8]:
def AppendDirectory(directoryPath, series):
    for i in range(len(series)):
        series.iloc[i] = os.path.join(directoryPath, series.iloc[i])
        
    return series

In [9]:
def GetFileName(filePath):
    fileNameWithExtension = os.path.basename(filePath)
    fileName = os.path.splitext(fileNameWithExtension)[0]
    return fileName

In [10]:
def GetImageInfo(dataFrame, filePath="FilePath", rename=False):
    for i in range(len(dataFrame)):
        
        #Get Image File Path
        imagePath = dataFrame.loc[i, filePath]

        try:
            #Open Image
            im = Image.open(imagePath)
            im.load()
            
            dataFrame.loc[i, "Width"], dataFrame.loc[i, "Height"] = im.size
    
            if(rename):
                #Rename File
                directory = os.path.dirname(imagePath)
                fileExtension = os.path.splitext(imagePath)[-1] 
                newFileName = ("{0}{1}".format(str(i), fileExtension))
                newFilePath = os.path.join(directory, newFileName)
                os.rename(imagePath, newFilePath)

                #Update Dataframe with New File Path
                dataFrame.loc[i, "FilePath"] = newFilePath
        except:
            os.remove(imagePath)
            dataFrame.drop(dataFrame.index[i])
            
    dataFrame.reset_index(inplace=True, drop=True)

In [11]:
#Load List into Pandas series
labeledDF = pandas.DataFrame(labeledFiles, columns=["FilePath"])
unlabeledDF = pandas.DataFrame(unlabeledFiles, columns=["FilePath"])

In [12]:
labeledDF["FilePath"] = AppendDirectory(labeledDirectory, labeledDF["FilePath"])
unlabeledDF["FilePath"] = AppendDirectory(unlabeledDirectory, unlabeledDF["FilePath"])

In [13]:
GetImageInfo(labeledDF)
GetImageInfo(unlabeledDF, rename=True)

In [14]:
unlabeledDF.head()

Unnamed: 0,FilePath,Width,Height
0,Data/Unlabeled/0.jpg,1984.0,1984.0
1,Data/Unlabeled/1.jpg,1984.0,1984.0
2,Data/Unlabeled/2.jpg,2600.0,2048.0
3,Data/Unlabeled/3.jpg,2600.0,2048.0
4,Data/Unlabeled/4.jpg,1984.0,1984.0


In [15]:
labeledDF["Label"] = labeledDF["FilePath"].apply(lambda x: GetFileName(x))
labeledDF.head()

Unnamed: 0,FilePath,Width,Height,Label
0,Data/Labeled/Glaucoma 2.jpg,2600.0,2048.0,Glaucoma 2
1,Data/Labeled/Retinal Tear.tif,1984.0,1984.0,Retinal Tear
2,Data/Labeled/Birdshot chorioretinitis.jpg,1984.0,1984.0,Birdshot chorioretinitis
3,Data/Labeled/AMPPE OD.jpg,1984.0,1984.0,AMPPE OD
4,Data/Labeled/Wyburn Mason.jpg,1984.0,1984.0,Wyburn Mason
