In [7]:
# for tensorflow
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow import keras

# other usefull library
import numpy as np
import os
from os.path import join as pjoin
from datetime import datetime
from sklearn.model_selection import train_test_split
import collections
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
path_drive = "gdrive/My Drive/ColabNotebooks/"

In [None]:
def split_train_val(args, per_val=0.1):
    """
    Separa a base em treino e validação
    :parâmetro args: argumentos 
    :parâmetro per_val: representar a proporção do conjunto de dados a ser incluído na divisão de validação (entre 0,0 e 1,0)
    """
    # create inline and crossline sections for training and validation:
    loader_type = 'section'
    labels = np.load(pjoin((path_drive + 'data'), 'train', 'train_labels.npy'))
    
    i_list = list(range(labels.shape[0]))
    i_list = ['i_'+str(inline) for inline in i_list]
    
    x_list = list(range(labels.shape[1]))
    x_list = ['x_'+str(crossline) for crossline in x_list]
    
    list_train_val = i_list + x_list
    
    # create train and test splits:
    list_train, list_val = train_test_split(
        list_train_val, test_size=per_val, shuffle=True)

    #write to files to disK:
    file_object = open(
        pjoin((path_drive + 'data'), 'splits', loader_type + '_train_val.txt'), 'w')
    file_object.write('\n'.join(list_train_val))
    file_object.close()
    file_object = open(
        pjoin((path_drive + 'data'), 'splits', loader_type + '_train.txt'), 'w')
    file_object.write('\n'.join(list_train))
    file_object.close()
    file_object = open(pjoin((path_drive + 'data'), 'splits', loader_type + '_val.txt'), 'w')
    file_object.write('\n'.join(list_val))
    file_object.close()


In [None]:
class section_loader():
  """
      Data loader for the section-based deconvnet
  """
  def __init__(self, split='train', is_transform=True,
                augmentations=None):      
    """
    Inicializa os parâmetros da classe
    :parâmetro split: argumentos 
    :parâmetro is_transform: argumentos 
    :parâmetro augmentations:  
    """      
    data = path_drive + 'data/' #path
    self.root = data
    self.split = split
    self.is_transform = is_transform
    self.augmentations = augmentations
    self.n_classes = 6 
    self.mean = 0.000941 # average of the training data  
    self.sections = collections.defaultdict(list)

    if 'test' not in self.split: 
      # Normal train/val mode
      self.seismic = np.load(pjoin((path_drive + 'data'),'train','train_seismic.npy'))
      self.labels = np.load(pjoin((path_drive + 'data'),'train','train_labels.npy'))
    elif 'test1' in self.split:
      self.seismic = np.load(pjoin((path_drive + 'data'),'test_once','test1_seismic.npy'))
      self.labels = np.load(pjoin((path_drive + 'data'),'test_once','test1_labels.npy'))
    elif 'test2' in self.split:
      self.seismic = np.load(pjoin((path_drive + 'data'),'test_once','test2_seismic.npy'))
      self.labels = np.load(pjoin((path_drive + 'data'),'test_once','test2_labels.npy'))
    else:
      raise ValueError('Unknown split.')

    if 'test' not in self.split:
      # We are in train/val mode. Most likely the test splits are not saved yet, 
      # so don't attempt to load them.  
      for split in ['train', 'val', 'train_val']:
        # reading the file names for 'train', 'val', 'trainval'""
        path = pjoin((path_drive + 'data'), 'splits', 'section_' + split + '.txt')
        file_list = tuple(open(path, 'r'))
        file_list = [id_.rstrip() for id_ in file_list]
        self.sections[split] = file_list
    elif 'test' in split:
      # We are in test mode. Only read the given split. The other one might not 
      # be available. 
      path = pjoin((path_drive + 'data'), 'splits', 'section_' + split + '.txt')
      file_list = tuple(open(path,'r'))
      file_list = [id_.rstrip() for id_ in file_list]
      self.sections[split] = file_list
    else:
      raise ValueError('Unknown split.')


  def __len__(self):
    return len(self.sections[self.split])

  def __getitem__(self, index):

    section_name = self.sections[self.split][index]
    direction, number = section_name.split(sep='_')

    if direction == 'i':
        im = self.seismic[int(number),:,:]
        lbl = self.labels[int(number),:,:]
    elif direction == 'x':    
        im = self.seismic[:,int(number),:]
        lbl = self.labels[:,int(number),:]
    
    if self.augmentations is not None:
        im, lbl = self.augmentations(im, lbl)
        
    if self.is_transform:
        im, lbl = self.transform(im, lbl)
    return im, lbl


  def transform(self, img, lbl):
    """
    Transforma os dados na posição correta e normaliza
    :parâmetro img: dados da imagem  
    :parâmetro lbl: dados das labels 
    return: img(imagem), lbl(label)
    """ 
    img -= self.mean

    # to be in the BxCxHxW: 
    img, lbl = img.T, lbl.T

    img = np.array(img)
    lbl = np.array(lbl)

    #normalização 0-1 (os dados estão entre 1 a -1)
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler = scaler.fit(img)
    img = scaler.fit_transform(img)
            
    return img, lbl