In [48]:
import numpy as np
import pandas as pd
import os # used for navigating to image path
import csv

In [49]:
ALL_TILES = '../../data/tidy/conflict-split-images/all_tiles/'
TILES_ASSIGNMENT = '../../data/tidy/conflict-split-images/tiles_assignment.csv'

In [50]:
def assign_sample_tiles(unassigned_tiles_path):
    """Randomly classifies tiles as 'c'(conflict) or 'n'(no conflict)"""
    
    data = pd.read_csv(unassigned_tiles_path)
#     data = data.drop([0])
    
    data['Classification'] = np.random.choice(['c','n'], size=len(data))
    
    return(data)

In [166]:
sample_df = assign_sample_tiles(TILES_ASSIGNMENT)

In [52]:
def generate_index(sample_data): #path_to_tiles_assignment
    """Serially labels all tiles by class and generates an index to data/tidy/conflict_split_images"""
    
#     data = pd.read_csv(path_to_tiles_assignment)
#     data = data.drop([0])
    
    df = pd.DataFrame(columns=['original', 'tile', 'classification'])
    
    conflict_counter    = 1
    no_conflict_counter = 1
    unknown_counter = 1
    counters = {}
        
    for row in sample_data.itertuples():
        tile = row.Filename
        original_image = str(os.path.splitext(tile)).split('_')[0][2:]
        classification = row.Classification

        if classification.lower() == 'c':        
            save_label = 'conflict' + '-' + str(conflict_counter) + '.jpg'
            df.loc[row.Index] = [original_image] + [tile] + [save_label]
            conflict_counter += 1
        elif classification.lower() == 'n':
            save_label = 'no_conflict' + '-' + str(no_conflict_counter) + '.jpg'
            df.loc[row.Index] = [original_image] + [tile] + [save_label]
            no_conflict_counter += 1
        else:
            save_label = 'unknown' + '-' + str(unknown_counter) + '.jpg'
            df.loc[row.Index] = [original_image] + [tile] + [save_label]
            unknown_counter += 1
    
    df.to_csv('../../data/tidy/conflict-split-images/tile_index_mapping.csv', encoding='utf-8', index=False)
    
    counters['conflict'] = conflict_counter - 1
    counters['no_conflict'] = no_conflict_counter - 1
    counters['unknown'] = unknown_counter -1 
    
    print('Number of conflict tiles recorded:', conflict_counter - 1)    
    print('Number of no-conflict tiles recorded:', no_conflict_counter - 1)
    print('Number of unknown tiles:', unknown_counter - 1)
        
    return df, counters

In [167]:
df, c = generate_index(sample_df)

Number of conflict tiles recorded: 267
Number of no-conflict tiles recorded: 299
Number of unknown tiles: 0


In [162]:
df

Unnamed: 0,original,tile,classification
1,improbable-1,improbable-1_0_336.jpg,no_conflict-1.jpg
2,improbable-1,improbable-1_0_672.jpg,no_conflict-2.jpg
3,improbable-1,improbable-1_0_1008.jpg,conflict-1.jpg
4,improbable-1,improbable-1_0_1344.jpg,no_conflict-3.jpg
5,improbable-1,improbable-1_0_1680.jpg,conflict-2.jpg
...,...,...,...
562,probable-2,probable-2_3584_1344.jpg,conflict-288.jpg
563,probable-2,probable-2_3584_1680.jpg,conflict-289.jpg
564,probable-2,probable-2_3584_2016.jpg,no_conflict-275.jpg
565,probable-2,probable-2_3584_2352.jpg,no_conflict-276.jpg


In [155]:
df[df['classification']=='improbable-1_0_0.jpg']

Unnamed: 0,original,tile,classification


In [146]:
def rename_tiles_conflict(data):
    for filename in os.listdir(ALL_TILES):
        for row in data.itertuples():
            tile = row.tile
            classification = row.classification
            if tile == filename:
                os.rename(ALL_TILES + filename, ALL_TILES + classification)
                continue

In [168]:
rename_tiles_conflict(df)

In [159]:
SEED = 100  
#NUM_PLOT_IMAGES_PER_CLASS = 1 #4 ## NOT USED IN CURRENT IMPLEMENTATION
#EXPANSION_FACTOR = 5 #5 of augmented images ## NOT USED IN CURRENT IMPLEMENTATION

def getImageOneHotVector(image_file_name):
    """Returns one-hot vector encoding for each image based on specified classification scenario:
    Classification Scenario Pr_Po_Im (3 classes): {probable, possible, improbable}
    Classification Scenario Pr_Im (2 classes): {probable, improbable}
    Classification Scenario PrPo_Im (2 classes): {{probable, possible}, improbable} 
    Classification Scenario Pr_PoIm (2 classes): {probable, {possible, improbable}}
    """
    word_label = image_file_name.split('-')[0]
    if word_label == 'conflict' : 
        return 1
    elif word_label == 'no_conflict': 
        return 0
    else:
        return -1 # if label is not present for current image

In [169]:
labels = []
for filename in os.listdir(ALL_TILES):
    labels.append(filename.split('-')[0])

In [170]:
print(set(labels))

{'no_conflict', 'improbable', 'conflict'}
