In [1]:
from PIL import Image
import os
import pandas as pd
import numpy as np
from pathlib import Path

# Ingest raw data contained in raw_data.zip

In [2]:
sgsdir = Path('raw_data/SGS')
dcgandir = Path('raw_data/256_dcgan_cp_150/256_dcgan_cp_150')
updated_gandir = Path('raw_data/updated_gan')
diffusiondir = Path('raw_data/256_sim_cp_10_100.csv')


coord = pd.read_csv(Path('raw_data/coords_256.csv')).values

In [3]:
#SGS - 50 realizations
# sgs = np.empty((len(os.listdir(sgsdir)), 256, 256))

# for i, name in enumerate(os.listdir(sgsdir)):
#     sgs[i] = pd.read_parquet(sgsdir+"/"+name).values.reshape(300,300)[:256, :256]

In [4]:
def ImgData(filename):

    imgs = np.empty((len(os.listdir(filename)), 256**2))

    for i, name in enumerate(os.listdir(filename)):
        path = os.path.join(filename, name)
        image = Image.open(path).convert('L')
        image = np.array(image, dtype=np.float32) / 255.0

        imgs[i] = image.reshape(256**2)

    return imgs

sgs = ImgData(sgsdir)
sgs = sgs.reshape((sgs.shape[0], 256, 256))
sgs.shape

(100, 256, 256)

In [5]:
sgs = (sgs + 1)/2

In [6]:
coord.shape, sgs.shape

((65536, 2), (100, 256, 256))

In [7]:
x_uniq = np.unique(coord[:, 0])
y_uniq = np.unique(coord[:, 1])

xx, yy = np.meshgrid(x_uniq, y_uniq)

xx.shape, yy.shape

((256, 256), (256, 256))

In [8]:
x_uniq[1]-x_uniq[0], y_uniq[1]-y_uniq[0]

(500.0, 500.0)

## DCGAN

In [9]:
# DCGAN - 100 samples
dcgan = np.empty((len(os.listdir(dcgandir)), 256, 256))

for i, name in enumerate(os.listdir(dcgandir)):
        path = os.path.join(dcgandir, name)
        image = Image.open(path).convert('L')
        image_normalized = np.array(image, dtype=np.float32) / 255.0
        
        
        dcgan[i] = image_normalized

dcgan.shape

(100, 256, 256)

# Updated GAN

In [10]:
# DCGAN - 100 samples
gan = np.empty((len(os.listdir(updated_gandir)), 256, 256))

for i, name in enumerate(os.listdir(updated_gandir)):
        path = os.path.join(updated_gandir, name)
        image = Image.open(path).convert('L')
        image_normalized = np.array(image, dtype=np.float32) / 255.0
        
        
        gan[i] = image_normalized

gan.shape

(100, 256, 256)

## Diffusion

In [11]:
# Diffusion - 50 samples (randomly sampled from 100 original -- too big to push to repo!)

diffusion = pd.read_csv(diffusiondir, header = None)
diffusion = diffusion.to_numpy().reshape((100, 256, 256))
diffusion.shape

(100, 256, 256)

In [12]:
dmin = -665.37
dmax = 636.33

def rescale_range(x, a, b):
    return a+((x-np.nanmin(x))*(b-a))/(np.nanmax(x)-np.nanmin(x))

sgs = rescale_range(sgs, dmin, dmax)
diffusion = rescale_range(diffusion, dmin, dmax)
dcgan = rescale_range(dcgan, dmin, dmax)
gan = rescale_range(gan, dmin, dmax)

In [13]:
np.savez_compressed('data.npz', sgs=sgs, diffusion=diffusion, dcgan=dcgan, gan=gan, xx=xx, yy=yy)

# Make variograms

In [14]:
import multiprocessing as mp
import skgstat as skg

In [42]:
maxlag = 50000
n_lags = 30
downsample = 0.1

sgs_inputs = []
diffusion_inputs = []
dcgan_inputs = []
gan_inputs = []

kwargs = {'bin_func' : 'even', 'n_lags' : n_lags, 'maxlag' : maxlag, 'normalize' : True, 'samples' : downsample}

for i in range(100):
    sgs_inputs.append([coord, sgs[i].flatten(), kwargs])
    diffusion_inputs.append([coord, diffusion[i].flatten(), kwargs])
    dcgan_inputs.append([coord, dcgan[i].flatten(), kwargs])
    gan_inputs.append([coord, gan[i].flatten(), kwargs])

In [43]:
import variogram
import importlib
importlib.reload(variogram)

from variogram import experimental_variogram

In [44]:
%%time

if __name__ == '__main__':
    with mp.Pool(8) as pool:
        result_sgs = pool.starmap(experimental_variogram, sgs_inputs)
        sgs_var = np.array(result_sgs)

        result_diff = pool.starmap(experimental_variogram, diffusion_inputs)
        diffusion_var = np.array(result_diff)

        result_dcgan = pool.starmap(experimental_variogram, dcgan_inputs)
        dcgan_var = np.array(result_dcgan)

        result_gan = pool.starmap(experimental_variogram, gan_inputs)
        gan_var = np.array(result_gan)

CPU times: total: 1.27 s
Wall time: 13min 59s


In [49]:
vario = skg.Variogram(coord, sgs[0].flatten(), **kwargs)

In [50]:
np.savez_compressed('variograms.npz', sgs=sgs_var, diffusion=diffusion_var, dcgan=dcgan_var, gan=gan_var, bins=vario.bins)

In [39]:
sgs_var = np.array(result)
sgs_var.shape

(100, 30)

### Diffusion

In [36]:
%%time

if __name__ == '__main__':
    with mp.Pool(8) as pool:
        result = pool.starmap(experimental_variogram, diffusion_inputs)

CPU times: total: 375 ms
Wall time: 3min 37s


In [39]:
diffusion_var = np.array(result)
diffusion_var.shape

(100, 30)

### DCGAN

In [36]:
%%time

if __name__ == '__main__':
    with mp.Pool(8) as pool:
        result = pool.starmap(experimental_variogram, dcgan_inputs)

CPU times: total: 375 ms
Wall time: 3min 37s


In [39]:
dcgan_var = np.array(result)
dcgan_var.shape

(100, 30)

### Updated GAN

In [36]:
%%time

if __name__ == '__main__':
    with mp.Pool(8) as pool:
        result = pool.starmap(experimental_variogram, gan_inputs)

CPU times: total: 375 ms
Wall time: 3min 37s


In [39]:
gan_var = np.array(result)
gan_var.shape

(100, 30)

In [23]:
vario = experimental_variogram(coord, sgs[0].flatten(), kwargs)

In [31]:
vario

< spherical Semivariogram fitted to 30 bins >

In [33]:
type(vario.experimental)

numpy.ndarray