In [None]:
from shutil import copy2
from glob import glob
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
#____________________________________________________________________________________
np.random.seed(1381)
train_sample_size = 5000
test_sample_size = 500
#____________________________________________________________________________________
root_dir = "/content/drive/My Drive/ATI/PKLot/PKLotSegmented/"
cnn_dataset =  "/content/drive/My Drive/ATI/CNN_Dataset/"

if not os.path.isdir(cnn_dataset):
    os.makedirs(cnn_dataset)
    print('***CNN dataset folder created at >>>', cnn_dataset)
#___________________________________________________________________

PKLot_Preprocess =  "/content/drive/My Drive/ATI/PKLot_Preprocess/"

if not os.path.isdir(PKLot_Preprocess):
    os.makedirs(PKLot_Preprocess)
    print('***PKLot_Preprocess folder created at >>> ', PKLot_Preprocess)
#____________________________________________________________________________________

classes = ["Empty", "Occupied"]
datasets = {
    "train": ["UFPR04/Sunny/", "UFPR04/Rainy/", "UFPR04/Cloudy/","UFPR05/Sunny/", "UFPR05/Rainy/", "UFPR05/Cloudy/"],
    "test": ["PUC/Sunny/","PUC/Rainy/", "PUC/Cloudy/"]}

train_dataset = []
test_dataset = []
                  
for c in classes:
    for data_type in datasets:
        for folder in datasets[data_type]:
            dir_content = [d for d in os.listdir(os.path.join(root_dir,folder))if os.path.isdir(os.path.join(root_dir,folder,d))]
            print(dir_content)
            
            for d in dir_content:
                folder_path = os.path.join(root_dir,folder,d, c)
                images = glob(os.path.join(folder_path, "*.jpg"))
                
                if len(images)>0:
                    if data_type == 'train':
                        sample_size = 0.3
                    else:
                        sample_size = 0.1
                    random_sample = np.random.choice(images, replace=False,size=int(len(images)*sample_size))

                    for img in random_sample:
                        image_name = img.split("/")[-1]
                        temp = {}
                        folder_path = os.path.join(folder, d)
                        #_______________________________________________
                        temp['image_name'] = image_name
                        temp["label"] = c
                        temp['folder_path'] = folder_path
                        temp['data_type'] = data_type
                        temp['image_path'] = folder_path + image_name
                        #_______________________________________________
                        if data_type == 'train':
                            train_dataset.append(temp)
                        else:
                            test_dataset.append(temp)
####################################################################################
# convert the list into dataframe to store them as csv file
df_train = pd.DataFrame(train_dataset)
df_test = pd.DataFrame(test_dataset)

# shuffle the dataset
df_train = df_train.sample(frac=1, random_state=1431)
df_test = df_test.sample(frac=1, random_state=1431)

print("     Train Dataset size   : ", df_train.shape)
print("     Test  Dataset size   : ", df_test.shape)

df_train.to_csv( PKLot_Preprocess + 'Dataset_Train.csv', index=False)
df_test.to_csv( PKLot_Preprocess  + 'Dataset_Test.csv', index=False)

print("\n________***________ Dataset_Train.csv SAVED________***________")
print("________***________ Dataset_Test.csv  SAVED________***________")



#__________________________________________________________________________________________
if not os.path.isdir(os.path.join(cnn_dataset, 'train', 'Occupied')):
    os.makedirs(os.path.join(cnn_dataset, 'train', 'Occupied'))
    print('\n\n Train>>>Occupied  folder created at :     ', cnn_dataset)
    
if not os.path.isdir(os.path.join(cnn_dataset, 'train', 'Empty')):
    os.makedirs(os.path.join(cnn_dataset, 'train', 'Empty'))
    print('Train>>Empty      folder created at :     ', cnn_dataset)
#__________________________________________________________________________________________
count = 1
for idx in df_train.index.values[:train_sample_size]:
    src = os.path.join(root_dir, df_train.loc[idx, 'folder_path'], df_train.loc[idx, 'label'], df_train.loc[idx, 'image_name'])
    if df_train.loc[idx, 'label'] == 'Occupied':
        dst = os.path.join(cnn_dataset, 'train', 'Occupied')
    else:
        dst = os.path.join(cnn_dataset, 'train', 'Empty')
    copy2(src, dst)
    count+=1

print("Total train images:", count)

#__________________________________________________________________________________________
if not os.path.isdir(os.path.join(cnn_dataset, 'test', 'Occupied')):
    os.makedirs(os.path.join(cnn_dataset, 'test', 'Occupied'))
    print('test>>>Occupied  folder created at      : ', cnn_dataset)

if not os.path.isdir(os.path.join(cnn_dataset, 'test', 'Empty')):
    os.makedirs(os.path.join(cnn_dataset, 'test', 'Empty'))
    print('test>>>Empty     folder created at      : ', cnn_dataset)
#__________________________________________________________________________________________
count = 1
for idx in df_test.index.values[:test_sample_size]:
    src = os.path.join(root_dir, df_test.loc[idx, 'folder_path'], df_test.loc[idx, 'label'], df_test.loc[idx, 'image_name'])
    if df_test.loc[idx, 'label'] == 'Occupied':
        dst = os.path.join(cnn_dataset, 'test', 'Occupied')
    else:
        dst = os.path.join(cnn_dataset, 'test', 'Empty')
    copy2(src, dst)
    count+=1
    
print("________***________Total test images  : {0} ________***________   ".format(count) )

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
***CNN dataset folder created at >>> /content/drive/My Drive/ATI/CNN_Dataset/
***PKLot_Preprocess folder created at >>>  /content/drive/My Drive/ATI/PKLot_Preprocess/
['2013-01-22', '2013-01-21', '2013-01-29', '2012-12-12', '2012-12-21', '2012-12-26', '2013-01-18', '2012-12-13', '2012-12-11', '2012-12-17', '2012-12-20', '2012-12-07', '2012-12-16', '2012-12-24', '2012-12-08', '2012-12-25', '2012-12-18', '2012-12-19', '2013-01-17', '2012-12-23']
['2013-01-17', '