In [1]:
import pandas as pd
import numpy as np

In [9]:
facies_names = ['argilaceous siltstone', 'fossiliferous argilaceous siltstone',
     'siliceous mudstone', 'argilaceous siliceous mudstone', 'calcareous siliceous mudstone',
     'wackestone', 'packstone - grainstone', 'rudstone', 'dolomitic packstone']

labels = pd.read_csv('./data/April_facies_labels.csv')
labels['Facies_Num'] = labels.Facies.apply(lambda s: facies_names.index(s)+1)
labels.to_csv('./data/April_facies_labels.csv')

## Concat Depth Arrays

In [3]:
depths = [np.load('./data/CORE_'+str(i)+'_depth_arr.npy') for i in range(1,5)]

FileNotFoundError: [Errno 2] No such file or directory: './data/CORE_1_depth_arr.npy'

In [31]:
shapes = [d.shape for d in depths]
print(shapes, depths[0].dtype)

[(122977,), (264938,), (62514,), (218190,)] float64


In [32]:
full_depth = np.hstack(tuple(depths))

full_depth.shape

(668619,)

In [38]:
where_labeled = full_depth[:np.argmax(full_depth>11033.6)]
print(where_labeled.shape)
where_labeled

(607406,)


array([10620.        , 10620.00067686, 10620.00135373, ...,
       11033.59815242, 11033.59883168, 11033.59951094])

In [37]:

np.save('./data/FULL_CORE_depth_arr.npy', where_labeled)

## Concat Image Arrays

In [23]:
def pad_and_concat(img1, img2):
    h1, w1 = img1.shape
    h2, w2 = img2.shape
    
    width_diff = w1 - w2
    
    if width_diff == 0:
        np.concatenate([img1, img2])
    elif width_diff > 0:
        padded2 = np.concatenate([img2, np.zeros((h2, width_diff))], axis=1)
        return np.concatenate([img1, padded2])
    else:
        padded1 = np.concatenate([img1, np.zeros((h1, abs(width_diff)))], axis=1)
        return np.concatenate([padded1, img2])

In [17]:
images = [np.load('./data/CORE_'+str(i)+'_image_arr.npy') for i in range(1,5)]

In [18]:
shapes = [i.shape for i in images]
print(shapes, images[0].dtype)

[(122977, 394), (264938, 417), (62514, 382), (218190, 394)] float64


In [19]:
from functools import reduce

In [39]:
full_image = reduce(pad_and_concat, images)[:where_labeled.size]
full_image.shape

(607406, 417)

In [40]:
np.save('./data/FULL_CORE_image_arr.npy', full_image)

## Make a label for each pixel row

In [52]:
where_labeled = np.load('./data/FULL_CORE_depth_arr.npy')

lab = np.zeros_like(where_labeled, dtype=np.uint8)
lab.shape

(607406,)

In [54]:
i = 0
for idx, facie in labels.iterrows():
    while where_labeled[i] < facie.End_Depth:
        lab[i] = facie.Class_Num
        i += 1
        if i == lab.size:
            break

In [56]:
unique, counts = np.unique(lab, return_counts=True)
list(zip(unique, counts/lab.size))

[(1, 0.11623197663506782), (2, 0.7116179293586168), (3, 0.17215009400631537)]

In [62]:
np.save('./data/FULL_CORE_class_labels.npy', lab)

In [4]:
where_labeled = np.load('./data/FULL_CORE_depth_arr.npy')
flab = np.zeros_like(where_labeled, dtype=np.uint8)
flab.shape

(607406,)

In [10]:
i = 0
for idx, facie in labels.iterrows():
    while where_labeled[i] < facie.End_Depth:
        flab[i] = facie.Facies_Num
        i += 1
        if i == flab.size:
            break

In [11]:
unique, counts = np.unique(flab, return_counts=True)
list(zip(unique, counts/flab.size))

[(1, 0.06599375047332427),
 (2, 0.050238226161743546),
 (3, 0.04512138503735558),
 (4, 0.3400888367912072),
 (5, 0.32640770753005405),
 (6, 0.0395139330200887),
 (7, 0.10421530245009104),
 (8, 0.014048264258173281),
 (9, 0.014372594277962352)]

In [12]:
np.save('./data/FULL_CORE_facies_labels.npy', flab)