In [8]:
from google.colab import drive

drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [12]:
%cd gdrive/MyDrive/petrobras2

/content/gdrive/MyDrive/petrobras2


In [14]:
!ls


data			imres_data_setup.py  imres_train.py
imres_data_setup.ipynb	imres_data_split.py  mask_rcnn_coco.h5


# ===============================================================
# **Projeto de Classificação da Qualidade de Perfis de Imagem LWD** 

# Script para montar os arquivos com as imagens e bounding boxes individuais

# Data: Outubro de 2020
# Autor: Ralph Piazza
# ===============================================================

In [15]:
# import libraries

import numpy as np
import pandas as pd
import math
import imageio
from os import listdir

In [16]:
# global definitions

HEIGHT = 512

XMIN = 1
XMAX = 120

ANNOT_DIR = 'data/annots_original/'
IMAGE_DIR = 'data/images_original/'

IMAGE_CSV_DEST = 'data/images_csv/'
IMAGE_JPG_DEST = 'data/images/'
ANNOT_DEST = 'data/annots/'

RES_TRACK_NAME = 'RESOLUCAO_CURVA_REVISAO'
DEPTH_ANNOT = 'DEPTH'
DEPTH_IMAGE = 'TDEP'

In [17]:
# convert csv files into jpg files

def convert_jpg(file_path, img_count):     
    img_data = np.genfromtxt(file_path, delimiter=',')                              ### convert csv to jpg through normalization of data
    img_data -= img_data.min()
    img_data = img_data / img_data.max()
    img_data *= 255
    img_data = np.uint8(img_data)
    
    write_path = IMAGE_JPG_DEST + str(img_count).zfill(5) + '.jpg'
    imageio.imwrite(write_path, img_data)
    return

In [18]:
# create csv files with individual images

def create_imgfiles(df_image, prev_wells_images, df_annot, well_number):    
    df_single_image = pd.DataFrame()
    
    total_images = math.floor(len(df_image) / HEIGHT)                               ### obs.: ignore reduced image remaining at the end of the well
        
    for img in range(1, total_images + 1): 
        df_single_image = df_single_image.iloc[0:0]
        init_row = (img - 1) * HEIGHT
        final_row = init_row + HEIGHT
        
        img_count = img + prev_wells_images                                         ### select rows corresponding to an individual image
        df_single_image = df_image.iloc[init_row:final_row]
        
        create_bbfile(df_single_image, img_count, df_annot, well_number)            ### create individual annotation file corresponding to the image
        
        file_path = IMAGE_CSV_DEST + str(img_count).zfill(5) + '.csv'
        df_single_image = df_single_image.drop(columns=[DEPTH_IMAGE])        
        df_single_image.to_csv(file_path, header=False, index=False)                ### create individual csv image file
        
        convert_jpg(file_path, img_count)                                           ### create jpg image file from the csv file     
            
    prev_wells_images += total_images        
    return prev_wells_images

In [19]:
# create csv files with bounding boxes for each image 

def create_bbfile(df_single_image, img_count, df_annot, well_number):
    df_single_annot = pd.DataFrame(columns=['XMIN', 'YMIN', 'XMAX', 'YMAX', 'CLASS', 'HEIGHT', 'WELL', 'INIT_DEPTH', 'FINAL_DEPTH'])
    
    image_init_depth = df_single_image[DEPTH_IMAGE].iloc[0]
    image_final_depth = df_single_image[DEPTH_IMAGE].iloc[-1]
    
    init_row = len(df_annot[df_annot[DEPTH_ANNOT] < image_init_depth])
    final_row = len(df_annot[df_annot[DEPTH_ANNOT] <= image_final_depth])
    
    bbox = 0
    start = init_row
    cur_class = df_annot['CLASS'].iloc[init_row]
    
    for i in range(init_row + 1, final_row):
        if(df_annot['CLASS'].iloc[i] != cur_class and i > start + 1):                                ### check for change in class and create bounding box
            low_depth = df_annot[DEPTH_ANNOT].iloc[start]
            high_depth = df_annot[DEPTH_ANNOT].iloc[i - 1]
            
            ymin = len(df_single_image[df_single_image[DEPTH_IMAGE] < low_depth])
            ymax = len(df_single_image[df_single_image[DEPTH_IMAGE] < high_depth])
            
            df_single_annot.at[bbox, 'XMIN'] = int(XMIN)
            df_single_annot.at[bbox, 'YMIN'] = int(ymin + 1)
            df_single_annot.at[bbox, 'XMAX'] = int(XMAX)
            df_single_annot.at[bbox, 'YMAX'] = int(ymax + 1)
            df_single_annot.at[bbox, 'CLASS'] = int(cur_class)
            df_single_annot.at[bbox, 'HEIGHT'] = HEIGHT
            df_single_annot.at[bbox, 'WELL'] = well_number
            df_single_annot.at[bbox, 'INIT_DEPTH'] = low_depth
            df_single_annot.at[bbox, 'FINAL_DEPTH'] = high_depth
            
            bbox += 1
            cur_class = df_annot['CLASS'].iloc[i]
            start = i
            
        elif(df_annot['CLASS'].iloc[i] != cur_class and i == start + 1):                                ### check if bounding box might have a single line and skip it
            cur_class = df_annot['CLASS'].iloc[i]
            start = i
            
        elif(i == final_row - 1 and i > start + 1):                                                     ### create bounding box of the final parte of the image 
            low_depth = df_annot[DEPTH_ANNOT].iloc[start]
            high_depth = df_annot[DEPTH_ANNOT].iloc[i]
            
            ymin = len(df_single_image[df_single_image[DEPTH_IMAGE] < low_depth])
            ymax = len(df_single_image[df_single_image[DEPTH_IMAGE] < high_depth])
        
            df_single_annot.at[bbox, 'XMIN'] = int(XMIN)
            df_single_annot.at[bbox, 'YMIN'] = int(ymin + 1)
            df_single_annot.at[bbox, 'XMAX'] = int(XMAX)
            df_single_annot.at[bbox, 'YMAX'] = int(ymax + 1)
            df_single_annot.at[bbox, 'CLASS'] = int(cur_class)
            df_single_annot.at[bbox, 'HEIGHT'] = HEIGHT
            df_single_annot.at[bbox, 'WELL'] = well_number
            df_single_annot.at[bbox, 'INIT_DEPTH'] = low_depth
            df_single_annot.at[bbox, 'FINAL_DEPTH'] = high_depth
    
    file_path = ANNOT_DEST + str(img_count).zfill(5) + '.csv'
    df_single_annot.to_csv(file_path, index=False)   
    return

In [20]:
# parse original image and annotation files

prev_wells_images = 0

for image_file in listdir(IMAGE_DIR):                                                           ### select image file
    df_image = pd.read_csv(IMAGE_DIR + image_file, decimal=",", sep=';')
    df_image = df_image.dropna()
    
    well_number = int(image_file[5:7])
       
    for annot_file in listdir(ANNOT_DIR):                                                       ### select the corresponding annotation file 
        if int(annot_file[5:7]) == well_number:
            df_annot = pd.read_csv(ANNOT_DIR + annot_file, skiprows=[1], decimal=",", sep=';')          
            df_annot = df_annot.dropna(subset=[DEPTH_ANNOT, RES_TRACK_NAME])
            df_annot = df_annot.drop(df_annot[df_annot.RESOLUCAO_CURVA_REVISAO == -9999].index)
            break
            
    info_init_depth = df_annot[DEPTH_ANNOT].iloc[0]
    info_final_depth = df_annot[DEPTH_ANNOT].iloc[-1]
    image_init_depth = df_image[DEPTH_IMAGE].iloc[0]
    image_final_depth = df_image[DEPTH_IMAGE].iloc[-1]
    
    if info_init_depth > image_init_depth:                                                      ### crop files so that they cover approximately the same depth span
        df_image = df_image.drop(df_image[df_image.TDEP < info_init_depth].index)
    elif info_init_depth < image_init_depth:
        df_annot = df_annot.drop(df_annot[df_annot.DEPTH < image_init_depth].index)
    if info_final_depth > image_final_depth:
        df_annot = df_annot.drop(df_annot[df_annot.DEPTH > image_final_depth].index)
    elif info_final_depth < image_final_depth:
        df_image = df_image.drop(df_image[df_image.TDEP > info_final_depth].index)
                    
    df_annot.loc[df_annot[RES_TRACK_NAME] == 1, 'CLASS'] = 1
    df_annot.loc[df_annot[RES_TRACK_NAME] == 10, 'CLASS'] = 2
    df_annot.loc[df_annot[RES_TRACK_NAME] == 100, 'CLASS'] = 3
    
    prev_wells_images = create_imgfiles(df_image, prev_wells_images, df_annot, well_number)                  ### call function to create individual image and annotation files