# Extract data from output files
### Analyze the output from a single LBANN run
March 9, 2020 \
April 6, 2020 : Major edit to store files in order of epochs \
April 21, 2020: Major edit, added jupyter widgets to compare pixel intensity plots \
May 8, 2020: Major edit, using all images for a given batch

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import time
from scipy import fftpack
from ipywidgets import *

In [5]:
%matplotlib widget

In [6]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/LBANN/lbann_cosmogan/3_analysis/')
from modules_image_analysis import *

[NbConvertApp] Converting notebook modules_image_analysis.ipynb to script
[NbConvertApp] Writing 17167 bytes to modules_image_analysis.py


In [7]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.


def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

## Explore image samples

In [8]:
def f_widget_compare(sample_names,sample_dict,Fig_type='pixel',rescale=True,log_scale=True,bins=25,mode='avg',normalize=True,bkgnd=[]):
    '''
    Module to make widget plots for pixel intensity or spectrum comparison for multiple sample sets
    '''
    
    ### Crop out large pixel values
    for key in sample_names:
        print(sample_dict[key].shape)
        sample_dict[key]=np.array([arr for arr in sample_dict[key] if np.max(arr)<=0.994])
        print(sample_dict[key].shape)
    
    img_list=[sample_dict[key] for key in sample_names]
    label_list=list(sample_names)
    
    
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
    
    if rescale: 
        for count,img in enumerate(img_list):
            img_list[count]=f_invtransform(img)
        if len(bkgnd): bkgnd=f_invtransform(bkgnd)
#         hist_range=(0,2000)
    else:
        bins=f_transform(bins)
#         hist_range=(-1,0.996)
    assert Fig_type in ['pixel','spectrum'],"Invalid mode %s"%(mode)
    
    if Fig_type=='pixel':
#         f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=hist_range)
        f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=None,bkgnd_arr=bkgnd)

    elif Fig_type=='spectrum':
        f_compare_spectrum(img_lst=img_list,label_lst=label_list,log_scale=log_scale,bkgnd_arr=bkgnd)



## Compare lbann images with input and keras code images

In [9]:
### Load validation input samples
img_raw='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/dataset_2_smoothing_200k/train.npy'
a1=np.load(img_raw)[:10000]
s_raw=f_transform(a1[:,:,:,0])[:10000]

print(s_raw.shape)
# print(s_raw.shape,[a.shape for a in s_keras])

(10000, 128, 128)


In [10]:
### Load images from keras code
img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run5/models/gen_imgs.npy'
img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run6/models/gen_imgs.npy'

s_keras=[[] for i in range(2)]
for count,i in enumerate([5,6]):
    img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run{0}_fixed_cosmology/models/gen_imgs.npy'.format(str(i))
    a1=np.load(img_keras)
    s_keras[count]=a1[:,:,:]

In [11]:
# ### Extract a few images generated by Lban 
parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_071108_gen_img_065330_batchsize_512/'
# parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_074157_gen_img_070005_batchsize_256/'

f_strg=parent_dir+'dump_outs/trainer0/model0/sgd.testing.epoch.*.step.*_gen_img_instance1_activation_output0.npy'
f_list=glob.glob(f_strg)
print(f_list)

arr=[np.load(fname)[:,0,:,:] for fname in f_list]
s_lbann=np.vstack(arr)
print(s_lbann.shape,np.max(s_lbann))


['/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_071108_gen_img_065330_batchsize_512/dump_outs/trainer0/model0/sgd.testing.epoch.3.step.3_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_071108_gen_img_065330_batchsize_512/dump_outs/trainer0/model0/sgd.testing.epoch.0.step.0_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_071108_gen_img_065330_batchsize_512/dump_outs/trainer0/model0/sgd.testing.epoch.1.step.1_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_071108_gen_img_065330_batchsize_512/dump_outs/trainer0/model0/sgd.testing.epoch.2.step.2_gen_img_instance1_activation_output0.npy']
(10023, 128, 128) 0.9956047


In [12]:
# ### Another dataset
lst=[(51,20320),(55,22080),(47,18880)]
s_new=[[] for i in range(len(lst))]
for count,run in enumerate(lst):
    epoch,step=run[0],run[1]
    print(epoch,step)
    prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200701_065330_batchsize_512/dump_outs/trainer0/model0/'
    fname=prefix+'sgd.training.epoch.{0}.step.{1}_gen_img_instance1_activation_output0.npy'.format(epoch,step)
    s_new[count]=np.load(fname)[:,0,:,:]


51 20320
55 22080
47 18880


In [13]:
def f_chisqr(images,val_hist,val_err,val_spec,val_spec_err,bins,transform):
    ''' Compute chi-sqr of rows wrt to input data'''

    val_dr=val_hist.copy()
    val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins

    ### Get all images in a batch
    sample=images if not transform else f_invtransform(images)

    ### Compute pixel histogram for row   ### !!Both pixel histograms MUST have same bins and normalization!
    gen_hist,gen_err=f_pixel_intensity(sample,plot=False,normalize=True,bins=bins,mode='avg')
    spec,spec_err=f_compute_spectrum(sample,plot=False)

    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    sq_diff=(gen_hist-val_hist)**2        
    chi_sqr_list=[]

    for count,(start,end) in enumerate(zip([0,22,38,0],[22,38,None,None])):  # 4 lists : small, medium, large pixel values and full 
        chi_sqr_list.append(np.sum(np.divide(sq_diff[start:end],val_dr[start:end])))

    idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins

    chi_sqr_list.append(np.sum(np.divide(sq_diff[:idx],1.0))) ## chi-sqr without denominator division
    chi_sqr_list.append(np.sum(gen_err[:idx])/np.sum(val_err[:idx])) ## measures total spread in histograms wrt to input data

    ### computing the spectral loss chi-square
    chi_sqr_list.append(np.sum((val_spec[:50]-spec[:50])**2/(spec[:50]**2)))

    return chi_sqr_list


In [14]:
dict_samples={'raw': s_raw[:2000],'lbann1':s_lbann[:5000],'keras1':s_keras[0],'keras2':s_keras[1],'snew1':s_new[0],'snew2':s_new[1],'snew3':s_new[2]}


In [15]:
df1=pd.DataFrame([])

s_input=s_raw[:]
transform=False  # If true, it computes histogram in the orignal scale of pixels ie. 0-2000 
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
if not transform: bins=f_transform(bins)   ### scale to (-1,1)
#     bins=100
#     print(bins)

### Get pixel histogram of all input data
val_hist,val_err=f_pixel_intensity(s_input,plot=False,normalize=True,bins=bins,mode='avg')    
### Computing spectrum ###
val_spec,val_spec_err=f_compute_spectrum(s_input,plot=False)
del s_input

for name,images in dict_samples.items():
    print(name,images.shape)
    chi_sqrs=f_chisqr(images,val_hist,val_err,val_spec,val_spec_err,bins,transform)
    keys=['name','chi_sqr1a','chi_sqr1b','chi_sqr1c','chi_sqr1d','chi_sqr2','chi_img_var','chi_spec']
    dict1=dict(zip(keys,[name]+chi_sqrs))
#     print(dict1)
    df1=df1.append(dict1,ignore_index=True)
    

raw (2000, 128, 128)
lbann1 (5000, 128, 128)
keras1 (5000, 128, 128)
keras2 (5000, 128, 128)
snew1 (512, 128, 128)
snew2 (512, 128, 128)
snew3 (512, 128, 128)


In [16]:
df1

Unnamed: 0,chi_img_var,chi_spec,chi_sqr1a,chi_sqr1b,chi_sqr1c,chi_sqr1d,chi_sqr2,name
0,2.26307,0.000732,5.2e-05,4.4e-05,0.000488,0.000584,3e-06,raw
1,1.493134,0.382274,0.005164,0.001487,0.00428,0.010931,0.000482,lbann1
2,1.421143,0.0272,8.4e-05,2.9e-05,0.000198,0.000311,5.8e-05,keras1
3,1.477085,0.03267,0.000482,0.00026,0.000185,0.000928,0.000295,keras2
4,5.057884,0.067963,0.001636,0.001368,0.008776,0.01178,0.000429,snew1
5,5.047597,0.109619,0.010802,0.006443,0.035665,0.05291,0.006076,snew2
6,4.459891,0.11742,0.003403,0.000716,0.010159,0.014278,0.001548,snew3


In [None]:
# max_val=np.amax(s_lbann,axis=(1,2))
# print(np.where(max_val>0.994))
# max_val.shape

# plt.figure()
# plt.plot(max_val)

In [None]:
# f_pixel_intensity(s_lbann[330],label='',normalize=False,log_scale=True,mode='simple')
# f_pixel_intensity(f_invtransform(s_lbann[330]),label='',normalize=False,log_scale=True,mode='simple')

In [17]:
# dict_samples={'raw': s_raw[:2000],'lbann1':s_lbann[:5000],'keras1':s_keras[0],'keras2':s_keras[1]}

# dict_samples={'raw': s_raw,'keras1':s_keras[0],'keras2':s_keras[1],'s_new1':s_new1,'s_new2':s_new2,'s_new3':s_new3}

bkgnd=s_raw
bkgnd=[]
interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),
                bins=SelectionSlider(options=np.arange(10,200,10),value=50),
                mode=['avg','simple'],bkgnd=fixed(bkgnd))

interactive(children=(SelectMultiple(description='sample_names', options=('raw', 'lbann1', 'keras1', 'keras2',…

<function __main__.f_widget_compare(sample_names, sample_dict, Fig_type='pixel', rescale=True, log_scale=True, bins=25, mode='avg', normalize=True, bkgnd=[])>

## Comparison for older dataset with multiple cosmologies

In [None]:
### Load images from keras code
img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run_200k_samples_peter_dataset_20_epochs/models/gen_imgs.npy'
a1=np.load(img_keras)
s_keras=a1[:,:,:]

### Load validation samples
img_raw='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/peter_dataset/raw_train.npy'

a1=np.load(img_raw)
s_raw=f_transform(a1[:,:,:,0])[:3000]

print(s_raw.shape,s_keras.shape)

In [None]:
### Extract a few images generated by Lban directly for a set of epochs
parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200529_111342_seed3273_80epochs/'

ff=[]

for epoch in [60,70]:
    f_strg=parent_dir+'dump_outs/trainer0/model0/sgd.validation.epoch.{}*gen_img*.npy'.format(epoch)
    lst=glob.glob(f_strg)
    ff.append(lst)
f_list=[fle for a in ff for fle in a] ## Flattening out a list of lists
print(len(f_list))

arr=[np.load(fname)[:,0,:,:] for fname in f_list]
s_lbann=np.vstack(arr)
print(s_lbann.shape,np.max(s_lbann))

In [None]:
dict_samples={'lbann':s_lbann, 'keras':s_keras,'raw': s_raw}
bkgnd=[]

interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),
                bins=SelectionSlider(options=np.arange(10,200,10),value=50),
                mode=['avg','simple'],bkgnd=fixed(bkgnd))