In [1]:
import numpy as np
import pandas as pd
import h5py
import os
import re
from tqdm.notebook import tqdm
from datetime import datetime

CURR_DIR = os.getcwd()

# perms: [mean(log10(mD)), sigma(log10(mD))]
ROCKS = {
    'Fels_Dyke_Sill' : {'lithology_class':'dyke', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Maf_Dyke_Sill' : {'lithology_class':'dyke', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},

    'Granite' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Peridotite' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Porphyry' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Pyxen_Hbnd' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Gabbro' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Diorite' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},
    'Syenite' : {'lithology_class':'plug', 'genetic_class':'intrusive', 'log_perm_mean_sigma':[1.56, 1.18], 'youngs_module':20, 'poisson_ratio':0.3},

    'Amphib' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Gneiss' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3},
    'Marble' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3},
    'Meta_Carb' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Meta_Felsic' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3},
    'Meta_Intermed' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3},    
    'Meta_Mafic' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Meta_Sediment' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Meta_Ultramaf' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3},
    'Schist' : {'lithology_class':'met_strat', 'genetic_class':'metamorphic', 'log_perm_mean_sigma':[2, 1.45], 'youngs_module':20, 'poisson_ratio':0.3}, 

    'Andesite' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},
    'Basalt' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},
    'Dacite' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},
    'Ign_V_Breccia' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},
    'Rhyolite' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},
    'Tuff_Lapillist' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'V_Breccia' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'V_Conglomerate' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'V_Sandstone' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'V_Siltstone' : {'lithology_class':'met_strat', 'genetic_class':'volcanic', 'log_perm_mean_sigma':[2.77, 0.85], 'youngs_module':20, 'poisson_ratio':0.3},

    'Conglomerate' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Limestone' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3},
    'Pelite' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Phyllite' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3}, 
    'Sandstone' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3},
    'Greywacke' : {'lithology_class':'strat', 'genetic_class':'sedimentary', 'log_perm_mean_sigma':[3.08, 0.75], 'youngs_module':20, 'poisson_ratio':0.3}, 
}

# perms: [min(log10(mD)), max(log10(mD))]
GEN_CLASS_PERM_LOG_MIN_MAX = {
    'intrusive' : [-1, 0],
    'metamorphic' : [0, 1],
    'volcanic' : [1, 2],
    'sedimentary' : [2, 3]
    }

PATH_TO_MODELS = 'D:/geological_models/' # path to folder with TAR files like "DYKE_FOLD_FOLT" and others
DOWNSCALE_FACTOR = (10, 10, 10) # (200, 200, 200) to (20, 20, 20)

In [2]:
# func to data proc
from scipy import ndimage
from scipy.interpolate import RegularGridInterpolator

def downscale(inp, factor):
    '''
    The running average is calculated for all points in the input array, 
    so that the final array contains the mean value. 
    Subsequently, interpolation is performed onto a new grid, with the step specified by a tuple called the factor: (2,2,2) for downscaling by 2 times and so on.
    '''
    inp_dtype = inp.dtype
    avg_inp = ndimage.uniform_filter(inp.astype('float32'), size=factor, mode='nearest') #  it works with float32 only

    x, y, z = (np.arange(0,k,1) for k in avg_inp.shape) # old mesh
    interp = RegularGridInterpolator((x,y,z), avg_inp) # interpolator object

    kx, ky, kz = (np.arange(0,k,f) for k, f in zip(avg_inp.shape, factor)) 
    m1, m2, m3 = np.meshgrid(kx, ky, kz, indexing='ij') # new mesh
    out = interp((m1, m2, m3)) # downscaled array
    return out.astype(inp_dtype)

In [13]:
# functions to io things

import tarfile
import gzip
import shutil

def del_folder(mydir):
    try:
        shutil.rmtree(mydir)
    except OSError as e:
        print("Error: %s - %s." % (e.filename, e.strerror)) 

def extract_tar_to_temp_folder(tar_path):
    with tarfile.open(tar_path) as tar:
        for t in tar:
            if t.isreg():
                if t.name.split('.')[-2] in ['g00', 'g12']:
                    tar.extract(t.name)
            elif t.isdir():
                gz_root_dirname = t.name
    
    return gz_root_dirname

def get_labels_from_g12(g12_path):
    with gzip.open(g12_path, 'r') as f:
        rock_labels = np.loadtxt(f, skiprows=0).astype('int8')
        rock_labels = rock_labels.reshape((200, 200, 200))
        rock_labels = np.transpose(rock_labels, (1, 2, 0))
    
    return rock_labels

def get_rocks_from_g00(g00_path):
    with gzip.open(g00_path, 'r') as f:
        rock_dict = dict()
        for line in f:
            if "ROCK DEFINITION" in str(line):
                rock_type = str(line).split(" ")[2][0:-3]
                rock_label = int(str(line).split(" ")[-1][0:-3])
                rock_dict[rock_label] = {'type': rock_type}

            if "Density" in str(line):
                rock_dens = float(str(line).split(" ")[-1][0:-3])
                rock_dict[rock_label]['dens'] = rock_dens
    
    return rock_dict

def random_perm_1(log_mean, log_sigma):
    # from logmean and logsigma
    return 10**(log_sigma * np.random.randn() + log_mean)

def random_perm_2(log_min, log_max):
    # from genetic class
    return 10**(log_min + np.random.rand()*(log_max - log_min))

def add_random_perm_by_rock_type(some_rock_dict):
    # add perm to rock dict
    for key in some_rock_dict.keys():
        rock_type = some_rock_dict[key]['type']
        # log_perm_mean, log_perm_sigma = ROCKS[rock_type]['log_perm_mean_sigma']
        # perm = random_perm(log_perm_mean, log_perm_sigma)
        gen_class = ROCKS[rock_type]['genetic_class']
        log_min, log_max = GEN_CLASS_PERM_LOG_MIN_MAX[gen_class]
        perm = random_perm_2(log_min, log_max)
        some_rock_dict[key]['perm'] = perm
     
    return some_rock_dict

def map_labels_with_some_prop(labels, rock_dict, prop):
    #prop is 'perm' or 'dens' - property keys from rock_dict 
    model = np.zeros_like(labels, dtype='float16') 
    for key in rock_dict.keys():
        model[labels==key] = rock_dict[key][prop]
    
    return model

def write_to_h5dataset(idx, data_to_write, h5dataset):
    if idx > h5dataset.shape[0]:
        h5dataset.resize(h5dataset.shape[0]+1, axis=0)
    h5dataset[idx] = data_to_write

In [15]:
# all together  here
tar_list = os.listdir(PATH_TO_MODELS)
yield_path = f'{CURR_DIR}/downscaled_models_{datetime.now().strftime("%m_%d_%Y__%H_%M_%S")}.h5'
with h5py.File(yield_path, 'w') as targ:
    perm_h5_set = targ.create_dataset("perm", (1e6, 20, 20, 20), dtype='float16', maxshape=(None, 20, 20, 20)) # 
    dens_h5_set = targ.create_dataset("dens", (1e6, 20, 20, 20), dtype='float16', maxshape=(None, 20, 20, 20)) 

    for ii, tar in tqdm(enumerate(tar_list)):
        tar_path = f'{PATH_TO_MODELS}{tar}'
        gz_root_dirname = extract_tar_to_temp_folder(tar_path) # exlracting
        gz_list = os.listdir(gz_root_dirname)
        g00_path_list = [f'{gz_root_dirname}/{gz}' for gz in gz_list if 'g00' in gz] # paths to all g00 files with rock properties
        g12_path_list = [f'{gz_root_dirname}/{gz}' for gz in gz_list if 'g12' in gz] # paths to all g12 files with labels
        for jj, (g00_path, g12_path) in tqdm(enumerate(zip( g00_path_list, g12_path_list))):
            rock_dict = get_rocks_from_g00(g00_path)
            labels = get_labels_from_g12(g12_path)
            rock_dict = add_random_perm_by_rock_type(rock_dict) # add permeability
            perm_model = map_labels_with_some_prop(labels, rock_dict, 'perm') # permrability model mD
            dens_model = map_labels_with_some_prop(labels, rock_dict, 'dens') # density model g/cm^3

            perm_model = downscale(perm_model, DOWNSCALE_FACTOR) # downscaling
            dens_model = downscale(dens_model, DOWNSCALE_FACTOR)

            write_to_h5dataset(ii+jj, perm_model, perm_h5_set)
            write_to_h5dataset(ii+jj, dens_model, dens_h5_set)
           
            name = g00_path.split('.')[0] # names of initial files just in case

            if jj == 5: # delete me after testing
                break
                  

        del_folder(gz_root_dirname.split('/')[0]) #clear it
        break # delete me after testing

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [52]:
gz_root_dirname

'models_by_code/models/DYKE_DYKE_DYKE'

In [48]:
aa = np.zeros((3,3), dtype='float16')
bb = aa.astype('int')
bb = aa.astype(aa.dtype)
bb

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]], dtype=float16)

In [None]:
# и вот наш файл с проницаемостями
with h5py.File(perm_path, 'r') as f:
    perm = f['/perms'][0]
    poisson = f['/poissons'][0]
    youngs = f['/youngs'][0]
    dens = f['/density'][0]
# print(perm)
# print(poisson)
# print(youngs)
# print(dens)

In [3]:
# how to get
from methods.io_things import som_foo

som_foo()

1234