In [1]:
original_folder = 'livestalk_2-7'
holdout_basalt_folder = 'livestalk_3-1' 

new_folder = 'livestalk_carbondale-baseline-raw'

In [2]:
from shutil import copyfile
import os
import glob
import pandas as pd

In [3]:
def make_folder(path):
    if not os.path.exists(path):
        os.mkdir(path)
        os.mkdir(os.path.join(path,'train'))
        os.mkdir(os.path.join(path,'train','images'))
        os.mkdir(os.path.join(path,'train','labels'))

        os.mkdir(os.path.join(path,'valid'))
        os.mkdir(os.path.join(path,'valid','images'))
        os.mkdir(os.path.join(path,'valid','labels'))
        
        os.mkdir(os.path.join(path,'test'))
        os.mkdir(os.path.join(path,'test','images'))
        os.mkdir(os.path.join(path,'test','labels'))        
        
        print(f'folders created: {path}')
        
    else:
        print(f'a folder already exists here: {path}')

### Identify folder for original and resplit

In [4]:
original_path = os.path.join(os.getcwd(),original_folder)
holdout_path = os.path.join(os.getcwd(),holdout_basalt_folder)
new_path = os.path.join(os.getcwd(),new_folder)

make_folder(new_path)

yaml_txt = """names:
- cow
nc: 1
train: ../{0}/train/images
val: ../{0}/valid/images
test: ../{0}/test/images
"""

with open(os.path.join(new_folder,'data.yaml'),'w') as f:
    f.write(yaml_txt.format(new_folder))


a folder already exists here: /home/omar/datasci/w251-DL/livestalk/livestalk_carbondale-baseline-raw


### Get counts in old folders

In [5]:
def get_counts(path,ret=True):
    
    train_images = glob.glob(os.path.join(path,'train','images','*.jpg'))
    train_labels = glob.glob(os.path.join(path,'train','labels','*.txt'))

    valid_images = glob.glob(os.path.join(path,'valid','images','*.jpg'))
    valid_labels = glob.glob(os.path.join(path,'valid','labels','*.txt'))

    test_images = glob.glob(os.path.join(path,'test','images','*.jpg'))
    test_labels = glob.glob(os.path.join(path,'test','labels','*.txt'))


    all_images = train_images + valid_images + test_images
    all_labels = train_labels + valid_labels + test_labels

    all_images = sorted(all_images)
    all_labels = sorted(all_labels)

    assert len(all_images) == len(all_labels)

    print(f'Training: {len(train_images)}')
    print(f'Validation: {len(valid_images)}')
    print(f'Test: {len(test_images)}')
    print(f'Total number of images: {len(all_images)}')
    
    return all_images,all_labels
    
original_images, original_labels = get_counts(original_path)

holdout_basalt_images, holdout_basalt_labels = get_counts(holdout_path)

Training: 183
Validation: 75
Test: 0
Total number of images: 258
Training: 47
Validation: 12
Test: 0
Total number of images: 59


### Copy files to new location

In [6]:

def copy_files(images,labels,new_path,location):
    assert len(images) == len(labels)
    
    images = sorted(images)
    labels = sorted(labels)
    
    for image,label in zip(images,labels):
        
        image_file = image.rsplit('/',1)[-1].rsplit('.',1)[0]
        label_file = label.rsplit('/',1)[-1].rsplit('.',1)[0]
    
        assert image_file == label_file, 'lists not aligned'
    
        new_image_path = os.path.join(new_path,location,'images',image.rsplit('/',1)[-1])
        new_label_path = os.path.join(new_path,location,'labels',label.rsplit('/',1)[-1])
        
        copyfile(image,new_image_path)
        copyfile(label,new_label_path)
    
        #if valid, copy to test as well
        if location == 'valid':
            test_image_path = os.path.join(new_path,'test','images',image.rsplit('/',1)[-1])
            test_label_path = os.path.join(new_path,'test','labels',label.rsplit('/',1)[-1])        
            copyfile(image,test_image_path)
            copyfile(label,test_label_path)
                                      

#copy training images
copy_files(original_images, original_labels,new_path,location='train')

#copy test images
copy_files(holdout_basalt_images, holdout_basalt_labels,new_path,location='valid')


### Get counts in new location

In [7]:
_, _ = get_counts(new_path)

Training: 258
Validation: 59
Test: 59
Total number of images: 376
