##                                             Road Features Extraction

###    Data Processing for Training

### Convert image files into pixels and mark 1 for all white areas and 0 for all grey areas

In [33]:
import random
from os import listdir
from PIL import Image
from datetime import datetime

trainInputImagesPath = './training/input'
trainOutputImagesPath = './training/output'
testInputImagesPath = './testing/input'
testOutputImagesPath = './testing/output'


trainInputImagesFiles = listdir(trainInputImagesPath)
trainOutputImagesFiles = listdir(trainOutputImagesPath)
testInputImagesFiles = listdir(testInputImagesPath)
testOutputImagesFiles = listdir(testOutputImagesPath)


filtered_train_images = []
for file in trainInputImagesFiles:
    if file in trainOutputImagesFiles:
        filtered_train_images.append(file)

if(len(filtered_train_images) != len(trainOutputImagesFiles)):
    raise Exception('train input images and output images number mismatch')
        
filtered_test_images = []
for file in testInputImagesFiles:
    if file in testOutputImagesFiles:
        filtered_test_images.append(file)
        
if(len(testInputImagesFiles) != len(testOutputImagesFiles)):
    raise Exception('test input images and output images number mismatch')
    
for i in range(len(filtered_train_images)):
    inputImageFile = filtered_train_images[i][:-5]
    outputImageFile = trainOutputImagesFiles[i][:-5]
    if(inputImageFile != outputImageFile):
        raise Exception('train inputImageFile and outputImageFile mismatch at index', str(i))

for i in range(len(filtered_test_images)):
    inputImageFile = filtered_test_images[i][:-5]
    outputImageFile = testOutputImagesFiles[i][:-5]
    if(inputImageFile != outputImageFile):
        raise Exception('test inputImageFile and outputImageFile mismatch at index', str(i))

    
def writeDataFile(inputImagePath, outputImagePath, inputImageFiles, outputImageFiles, dataFileName):
    dataFile = open(dataFileName, 'w')
    rectSize = 5
    linesCount = 0
    linesLimit = 200000
    linesCountPerImage = 0
    linesLimitPerImage = (linesLimit / len(inputImageFiles)) + 1
    
    for i in range(len(inputImageFiles)):
        print(str(datetime.now()) + ': processing image', i)
        linesCountPerImage = 0
        inputImage = Image.open(inputImagePath + '/' + inputImageFiles[i])
        inputImageXSize, inputImageYSize = inputImage.size
        # inputImagePixels = inputImage.load()
        
        outputImage = Image.open(outputImagePath + '/' + outputImageFiles[i])
        outputImageXSize, outputImageYSize = outputImage.size
        outputImagePixels = outputImage.load()
        
        if((inputImageXSize != outputImageXSize) or (inputImageYSize != outputImageYSize)):
            raise Exception('train inputImage and outputImage mismatch at index', str(i))

        outputImageRoadPixelsArr = [];
        outputImageNonRoadPixelsArr= [];
        
        for x in range(rectSize//2, inputImageXSize - (rectSize//2)):
            for y in range(rectSize//2, inputImageYSize - (rectSize//2)):
                isRoadPixel = outputImagePixels[x, y]
                if(isRoadPixel):
                    outputImageRoadPixelsArr.append((x, y))
                else:
                    outputImageNonRoadPixelsArr.append((x, y))

        random.shuffle(outputImageRoadPixelsArr)
        random.shuffle(outputImageNonRoadPixelsArr)
        
        for m in range(len(outputImageRoadPixelsArr)):
            if(linesCountPerImage >= linesLimitPerImage):
                break
            
            if(((m*2) + 1) >= len(outputImageNonRoadPixelsArr)):
                break
            
            x = outputImageRoadPixelsArr[m][0];
            y = outputImageRoadPixelsArr[m][1];
            
            rect = (x - (rectSize//2), y - (rectSize//2), x + (rectSize//2) + 1, y + (rectSize//2) + 1)
            subImage = inputImage.crop(rect).load()
            line = ''
            for i in range(rectSize):
                for j in range(rectSize):
                    line += str(subImage[i, j][0]) + ','
                    line += str(subImage[i, j][1]) + ','
                    line += str(subImage[i, j][2]) + ','
            
            line += str(1) + '\n'
            linesCount += 1
            linesCountPerImage += 1
            dataFile.write(line)
            
            for n in range(2):
                x = outputImageNonRoadPixelsArr[(m*2) + n][0];
                y = outputImageNonRoadPixelsArr[(m*2) + n][1];
                
                rect = (x - (rectSize//2), y - (rectSize//2), x + (rectSize//2) + 1, y + (rectSize//2) + 1)
                subImage = inputImage.crop(rect).load()
                line = ''
                for i in range(rectSize):
                    for j in range(rectSize):
                        line += str(subImage[i, j][0]) + ','
                        line += str(subImage[i, j][1]) + ','
                        line += str(subImage[i, j][2]) + ','
                
                line += str(0) + '\n'
                linesCount += 1
                linesCountPerImage += 1
                dataFile.write(line)
    
    print(str(datetime.now()) + ': ' + dataFileName + ' linesCount:', linesCount)

trainDataFileName = './training/train.csv'
testDataFileName = './testing/test.csv'

print(str(datetime.now()) + ': writing trainDataFile')
writeDataFile(testInputImagesPath, testOutputImagesPath, filtered_test_images, testOutputImagesFiles, testDataFileName)
print(str(datetime.now()) + ': trainDataFile complete')


2022-01-15 17:23:55.637776: writing trainDataFile
2022-01-15 17:23:55.638780: processing image 0
2022-01-15 17:23:58.613303: processing image 1
2022-01-15 17:24:02.810237: processing image 2
2022-01-15 17:24:06.581216: processing image 3
2022-01-15 17:24:10.177319: processing image 4
2022-01-15 17:24:13.723698: processing image 5
2022-01-15 17:24:17.209756: processing image 6
2022-01-15 17:24:20.646511: processing image 7
2022-01-15 17:24:24.040407: processing image 8
2022-01-15 17:24:27.418224: processing image 9
2022-01-15 17:24:30.845428: processing image 10
2022-01-15 17:24:34.261764: processing image 11
2022-01-15 17:24:37.634724: processing image 12
2022-01-15 17:24:41.023776: ./testing/test.csv linesCount: 200031
2022-01-15 17:24:41.526305: trainDataFile complete
