In [3]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [4]:
import os
os.chdir('/home/oliverphilcox/ChempyMulti/')
from Chempy.parameter import ModelParameters
a=ModelParameters()
from Chempy.cem_function import single_timestep_chempy

## Load the data

In [6]:
# Define elements to use:

els = ['C','Fe','He','Mg','N','Ne','O','Si'] # TNG elements

# Load training data:
full_input = '/mnt/store1/oliverphilcox/ChempyMultiData/TNG/Random_Training_Data_TNG_500000_0_v2.npz'
dat1=np.load(full_input,mmap_mode='r')
full_input2 = '/mnt/store1/oliverphilcox/ChempyMultiData/TNG/Random_Training_Data_TNG_500000_1_v2.npz'
dat2=np.load(full_input2,mmap_mode='r')


all_els = dat1['elements']
for e in range(len(all_els)):
    assert dat1['elements'][e]==dat2['elements'][e]
    
#params = dat1['params']#[:100000]
params=np.concatenate([dat1['params'],dat2['params']])
#big_abun = dat1.f.abundances#[:100000]
big_abun=np.concatenate([dat1.f.abundances,dat2.f.abundances])

el_indices=np.zeros(len(els),dtype=int)
for e,el in enumerate(els):
    el_indices[e]=np.where(el==all_els)[0]
    
# Filter out unwanted elements
abun = big_abun[:,el_indices]


## Remove any bad runs
bitmap=np.ones(len(params),dtype=int)
for i,ab in enumerate(abun):
    if ab[0]==0:
        bitmap[i]=0

cut_params=params[np.where(bitmap==1)]
cut_abuns=abun[np.where(bitmap==1)]

good_index=np.where(np.isfinite(cut_abuns).all(axis=1))[0] # remove infinities
cut_params2=cut_params[good_index]
cut_abuns2=cut_abuns[good_index]
good_index2=np.where(cut_params2[:,-1]>0.99)[0] # remove bad birth times
cut_params3=cut_params2[good_index2]
cut_abuns3=cut_abuns2[good_index2]

# Set standardization parameters
par_mean=np.mean(cut_params3,axis=0)
par_std=np.std(cut_params3,axis=0)
ab_mean=np.mean(cut_abuns3,axis=0)
ab_std=np.std(cut_abuns3,axis=0)

# Change birth-time mean/std to give t in [0,1] - we use (T-mean_T)/std_T here so this works
par_mean[-1]=min(cut_params3[:,-1])
par_std[-1]=(max(cut_params3[:,-1])-min(cut_params3[:,-1]))

# Now randomize the selection
len_filt=np.random.choice(range(len(cut_abuns3)),replace=False,size=len(cut_abuns3))

# Create randomized and standardized training data
trainX=(cut_params3[len_filt]-par_mean)/par_std
trainY=(cut_abuns3[len_filt]-ab_mean)/ab_std

# Add in T^2 term for accuracy
n_poly=2
sq_trainX=np.zeros([trainX.shape[0],trainX.shape[1]+n_poly-1])#+2])
sq_trainX[:,:trainX.shape[1]]=trainX
for i in range(n_poly-1):
    sq_trainX[:,trainX.shape[1]+i]=trainX[:,-1]**(i+2)

print('Using %d training data points for a %d->%d shape network'%(sq_trainX.shape[0],sq_trainX.shape[1],trainY.shape[1]))

Using 915956 training data points for a 7->8 shape network


In [7]:
# Load test data:
datT=np.load('/mnt/store1/oliverphilcox/ChempyMultiData/TNG/Random_Test_Data_TNG_50000_0_v2.npz')
abunT=datT.f.abundances[:,el_indices]
elsT=datT.f.elements
paramsT=datT.f.params

bitmapT=np.ones(len(paramsT),dtype=int)
for i,ab in enumerate(abunT):
    if ab[0]==0:
        bitmapT[i]=0
                
# Remove dodgy data
cut_paramsT=paramsT[np.where(bitmapT==1)]
cut_abunsT=abunT[np.where(bitmapT==1)]
good_indexT=np.where((np.isfinite(cut_abunsT).all(axis=1)))
cut_params2T=cut_paramsT[good_indexT]
cut_abuns2T = cut_abunsT[good_indexT]
good_index2T=np.where(cut_params2T[:,-1]>0.99)
cut_params3T=cut_params2T[good_index2T]
cut_abuns3T = cut_abuns2T[good_index2T]
                     
# Standardize using same standardizations as before
testX=(cut_params3T-par_mean)/par_std
testY=(cut_abuns3T-ab_mean)/ab_std

sq_testX=np.zeros([testX.shape[0],testX.shape[1]+n_poly-1])#+2])
sq_testX[:,:testX.shape[1]]=testX
for i in range(n_poly-1):
    sq_testX[:,testX.shape[1]+i]=testX[:,-1]**(i+2)

print("Using %d test data points"%sq_testX.shape[0])

Using 45723 test data points


## Create the network in scikit-learn

In [None]:
from sklearn import cross_validation
from sklearn.neural_network import MLPRegressor

def single_regressor(neurons,el_index,epochs=1000,verbose=True):
    """Return out-of-sample score for a given number of neurons for one element"""
    model=MLPRegressor(solver='adam',alpha=0.001,max_iter=epochs,learning_rate='adaptive',tol=1e-13,
                       hidden_layer_sizes=(neurons,),activation='tanh',verbose=verbose,
                      shuffle=True,early_stopping=True)#,learning_rate_init=0.1)

    model.fit(sq_trainX,trainY[:,el_index])

    model_pred=model.predict(sq_testX)
    score = np.mean((model_pred-testY[:,el_index])**2.)
    diff = np.abs(testY[:,el_index]-model_pred)
    w0,w1=model.coefs_
    b0,b1=model.intercepts_
    return score,diff,[w0,w1,b0,b1]

def all_regressor(neurons,epochs=1000,verbose=True):
    """Return out-of-sample score for a given number of neurons for all elements"""
    model=MLPRegressor(solver='adam',alpha=0.001,max_iter=epochs,learning_rate='adaptive',tol=1e-13,
                       hidden_layer_sizes=(neurons,),activation='tanh',verbose=verbose,
                      shuffle=True,early_stopping=True)#,learning_rate_init=0.1)

    model.fit(sq_trainX,trainY)

    model_pred=model.predict(sq_testX)
    scores = np.mean((model_pred-testY)**2.,axis=0)
    diffs = np.abs(testY-model_pred)
    w0,w1=model.coefs_
    b0,b1=model.intercepts_
    return scores,diffs,[w0,w1,b0,b1]


## Run single neuron nets for various $n_\mathrm{neuron}$

In [None]:
def neural_run(nn):
    print("Running for %d neurons"%nn)
    return single_regressor(nn,0)[0]
import multiprocessing as mp
import tqdm
p=mp.Pool()
all_neurons = np.arange(5,85,5)
neuron_scores=list(tqdm.tqdm(p.imap(neural_run,all_neurons),total=len(all_neurons)))

In [None]:
np.savez('8element_net_variable_neurons_L2_scores',neurons=all_neurons,scores=neuron_scores)

In [None]:
d=np.load('8element_net_variable_neurons_L2_scores.npz')
all_neurons=d['neurons']
neuron_scores=d['scores']

In [None]:
plt.scatter(all_neurons,neuron_scores);plt.ylabel('L2 Score',fontsize=14);
plt.xlabel(r'$n_\mathrm{neuron}$',fontsize=14);
plt.yscale('log')
plt.ylim([4e-4,1e-1])

## Now train networks in parallel using $n_\mathrm{neuron}=40$:

In [None]:
neurons=40
def mp_run(el_i):
    print("Running net %d of %d"%(int(el_i)+1,len(els)))
    output = single_regressor(neurons,int(el_i),epochs=3000)
    return output

In [None]:
import multiprocessing as mp
p=mp.Pool()
import tqdm
output=list(tqdm.tqdm(p.imap(mp_run,range(len(els))),total=len(els)))

In [None]:
all_scores = np.zeros(len(els))
all_diffs = np.zeros([len(els),len(testY)])
coeffs=[]
for el_i in range(len(els)):
    all_scores[el_i],all_diffs[el_i],co=output[el_i]
    coeffs.append(co)

In [None]:
w0=np.hstack([co[0] for co in coeffs])
b0=np.hstack([co[2] for co in coeffs])
b1=np.hstack([co[3] for co in coeffs])

## Read in w1 vector into sparse structure
w1=np.zeros([w0.shape[1],b1.shape[0]])
assert neurons==w0.shape[1]/len(coeffs)
for i in range(len(coeffs)):
    w1[int(neurons*i):int(neurons*(i+1)),i]=coeffs[i][1][:,0]
    


In [None]:
def stacked_net_output(in_par):
    l1=np.matmul(in_par,w0)+b0
    return np.matmul(np.tanh(l1),w1)+b1

In [None]:
# Save output
np.savez('/mnt/store1/oliverphilcox/ChempyMultiData/TNG/stacked_8_element_net.npz',w0=w0,w1=w1,b0=b0,b1=b1,
         in_mean=par_mean,in_std=par_std,out_mean=ab_mean,out_std=ab_std,
         activation='tanh',neurons=neurons)

In [None]:
# Reload data
dat=np.load('/mnt/store1/oliverphilcox/ChempyMultiData/TNG/stacked_8_element_net.npz')
w0=dat['w0'];w1=dat['w1'];b0=dat['b0'];b1=dat['b1']

In [None]:
l1_err = np.abs(stacked_net_output(sq_testX)-testY)

In [None]:
plt.hist(l1_err[:,2],range=[0,.4],bins=100);

## Run for all networks together:

In [None]:
comb_scores,comb_diffs,_ = all_regressor(40,epochs=3000)

In [None]:
plt.errorbar(1,np.mean(all_scores),yerr=np.std(all_scores),marker='x',label='Stacked Nets')
plt.errorbar(2,np.mean(comb_scores),yerr=np.std(comb_scores),marker='x',label='Combined Net')
plt.legend();plt.title('MSE Scores');

In [None]:
plt.hist(all_diffs.ravel(),range=[0,np.percentile(all_diffs.ravel(),99)],alpha=0.5,bins=50,label='Stacked Single Nets');
plt.hist(comb_diffs.ravel(),range=[0,np.percentile(all_diffs.ravel(),99)],alpha=0.5,bins=50,label='Combined Nets');
plt.legend();

In [None]:
## Compute L1 differences for combined and stacked nets (with correct normalizations)
real_all_diffs = np.asarray([ad*ab_std for ad in all_diffs.T])
real_comb_diffs = np.asarray([cd*ab_std for cd in comb_diffs])

In [None]:
np.savez('/mnt/store1/oliverphilcox/ChempyMultiData/TNG/Training_plot_data.npz',
        real_all_diffs=real_all_diffs,
        real_comb_diffs=real_comb_diffs,
        els=els)

In [None]:
def percs(data,axis=0):
    percs=np.percentile(data,[15.865,50.,100.-15.865],axis=axis)
    return percs[1],percs[1]-percs[0],percs[2]-percs[1]
all_percs=percs(real_all_diffs)
comb_percs=percs(real_comb_diffs)


plt.figure(figsize=(8,6))
plt.errorbar(np.arange(len(els))-0.05,all_percs[0],
             yerr=[all_percs[1],all_percs[2]],label='Parallel Nets',c='b');
plt.errorbar(np.arange(len(els))+0.05,comb_percs[0],
             yerr=[comb_percs[1],comb_percs[2]],label='Combined Net',c='r');FS=18
plt.legend(fontsize=14);plt.ylabel('L1 Distance [dex]',fontsize=FS);plt.xlabel('Abundance',fontsize=FS)

names = []
for el in els:
    if el!='Fe':
        names.append('[%s/Fe]'%el)
    else:
        names.append('[Fe/H]')

plt.xticks(range(8),names,fontsize=16);
plt.savefig('Plots_New/L1_Element_Error.pdf',bbox_inches='tight')

In [None]:
p1,p2,p3=np.percentile(real_all_diffs,[15.865,50.,100-17.865])

In [None]:
print(r'$%.3f_{-%.3f}^{+%.3f}$'%(p2,p2-p1,p3-p2))

In [None]:
print("Single Stacked Nets",np.mean(real_all_diffs).round(3),np.std(real_all_diffs).round(3))

In [None]:
print("Combined Nets",np.mean(real_comb_diffs).round(3),np.std(real_comb_diffs).round(3))

### So the single nets seem to perform better here.

## Now access the convergence across parameter space:

#### Using mean L1 error here:

In [None]:
train_par = cut_params3
test_par=cut_params3T
stacked_pred =stacked_net_output(sq_testX)
stacked_diff = np.mean(np.abs((stacked_pred-testY)*ab_std),axis=1) # destandardize here

In [None]:
data_tr = train_par
data_v = test_par
param_error = stacked_diff

# Initialize plot
plt.clf()
text_size = 12

#plt.rc('text', usetex=False)
#plt.rc('font', family='sans-serif')

#plt.rc('font', family='serif',size = text_size)
#plt.rc('xtick', labelsize=text_size)
#plt.rc('ytick', labelsize=text_size)
#plt.rc('axes', labelsize=text_size, lw=1.0)
#plt.rc('lines', linewidth = 1)
#plt.rcParams['ytick.major.pad']='8'
#plt.rcParams['text.latex.preamble']=[r"\usepackage{libertine}"]
#params = {'text.usetex' : True,
#      'font.family' : 'libertine',
#      'text.latex.unicode': True,
#      }
#plt.rcParams.update(params)
parameter_names = [r'$\alpha_\mathrm{IMF}$',r'$\log_{10}(\mathrm{N_{Ia}})$',
               r'$\log_{10}(\mathrm{SFE})$',
               r'$\log_{10}(\mathrm{SFR_{peak}})$',r'$\mathrm{x}_\mathrm{out}$',r'$T_\mathrm{star}$']


# Plot settings
fig,axes = plt.subplots(nrows = 6, ncols = 6,figsize=(14.69,8.0))#,dpi=300)
alpha = 0.5
lw=2 # Linewidth
left = 0.1 # Left side of subplots
right = 0.8 # Right side
bottom = 0.075
top = 0.97
wspace = 0.0 # blankspace width between subplots
hspace = 0.0 # blankspace height between subplots
color_max = np.percentile(param_error,99.)#0.05#a.color_max
plt.subplots_adjust(left=left,bottom=bottom,right=right,top=top,wspace=wspace,hspace=hspace)

cmap= cm.YlGnBu

# Create plot
for i in range(6):
    for j in range(6):
        axes[i,j].locator_params(nbins=4)
        if j==1:
            axes[i,j].locator_params(nbins=4)
        if i==j:
            counts,edges = np.histogram(np.asarray(data_v[:,j]),bins=10)
            max_count = float(np.max(counts))
            counts = np.divide(counts,max_count)
            median = np.zeros(len(edges)-1)
            for k in range(len(edges)-1):
                choice = np.logical_and(np.greater(data_v[:,j],edges[k]),np.less(data_v[:,j],edges[k+1]))
                error=np.extract(choice,param_error)
                if len(error) != 0:
                    median[k] = np.median(error)
            colors = cmap(median/color_max)
            axes[i,j].bar(x = edges[:-1], height=counts, width = edges[1]-edges[0],
                                color=colors,alpha=alpha, linewidth=0,rasterized=True)
            axes[i,j].set_xlim(min(data_v[:,j]),max(data_v[:,j]))
            axes[i,j].set_ylim(0,1.05)
            if j !=0:
                plt.setp(axes[i,j].get_yticklabels(), visible=False)
            axes[i,j].vlines(np.percentile(data_v[:,j],15.865),axes[i,j].get_ylim()[0],axes[i,j].get_ylim()[1], color = 'k',alpha=alpha,linewidth = lw,linestyle = 'dashed')
            axes[i,j].vlines(np.percentile(data_v[:,j],100-15.865),axes[i,j].get_ylim()[0],axes[i,j].get_ylim()[1], color = 'k',alpha=alpha,linewidth = lw,linestyle = 'dashed')
            axes[i,j].vlines(np.percentile(data_v[:,j],50),axes[i,j].get_ylim()[0],axes[i,j].get_ylim()[1], color = 'k',alpha=alpha,linewidth = lw)
        if i>j:
            if j !=0:
                plt.setp(axes[i,j].get_yticklabels(), visible=False)
            P1 = axes[i,j].scatter(data_v[:,j],data_v[:,i],marker='x',alpha=0.3,
                                    c=param_error,vmin=0,vmax=color_max,cmap=cmap,s=3,rasterized=True)
            #P2 = axes[i,j].scatter(data_tr[:,j],data_tr[:,i],c='k',marker='+',s=80)
            axes[i,j].set_xlim(min(data_tr[:,j]),max(data_tr[:,j]))
            axes[i,j].set_ylim(min(data_tr[:,i]),max(data_tr[:,i]))
        if j>i:
            axes[i,j].axis('off')
        if i == 5:
            axes[i,j].set_xlabel(parameter_names[j])
        if j ==0:
            axes[i,j].set_ylabel(parameter_names[i])
        if i==2 and j == 1:
            cplot = axes[i,j].scatter(data_v[:,j],data_v[:,i],marker='.',alpha=0.3,
                                                c=param_error,vmin=0,vmax=color_max,
                                                cmap=cmap,s=3,rasterized=True)
            axes[i,j].set_xlim(min(data_tr[:,j]),max(data_tr[:,j]))
            axes[i,j].set_ylim(min(data_tr[:,i]),max(data_tr[:,i]))
cax=fig.add_axes([0.82,0.06,0.02,0.9]);
plt.colorbar(cplot,cax=cax);

#plt.savefig('Plots/Network_Error_Param_Space.pdf',bbox_inches='tight')

plt.show();


In [None]:
fig.savefig('Plots_New/Network_Error_Param_Space.png',dpi=300)#,bbox_inches='tight')#,dpi=50)