# Crash emulator processing 

We want to create an emulator capable of predicting if a parameter dataset will lead to successful or crashed simulation, based on a previous 300 perturbed physics ensemble ran at CNRM. This Notebook creates the inputdata and outputdata array readable by the emulator. 

#### Emulator equation : Y = f(X) 
* Y is a 1x300 binary matrice Y={0:failed,1:succeeded} containing the list of failed/successed simulations.
* X is a 30X300 matrice containing the 3à values of the hysical parameters, for the 300 perturbations.

## Import modules

In [None]:
# Computational modules 
%matplotlib inline
import xarray as xr
import glob
import os
import numpy as np
import netCDF4
from netCDF4 import Dataset
import pandas as pd
import re
from array import array
from pylab import *

# Plotting modules 
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap
import pandas.plotting
import matplotlib.ticker as ticker
# scatter plot matrix des variables quantitatives
from pandas.plotting import scatter_matrix
import seaborn as sns; sns.set()

# Keras modules 
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam
from keras.regularizers import l2
import keras.backend as K
from scipy import stats
from keras import layers
from keras.optimizers import RMSprop

# Scikit-learn
from sklearn.tree import DecisionTreeRegressor

Using TensorFlow backend.


## Read the input.csv file and create inputdata array

In [None]:
# Read data from file 'input.csv' 
# Then arrange nicely the data in dataframe and change data type of the values
data = pd.read_csv("./input.csv",sep=' ')
inputdata=data.drop(['t_IDs'],axis=1)
#INP = inputdata.transpose()
inputdata

In [None]:
# Save the parameters names in a list 
param_names=list(inputdata.columns.values)
param_names

# Save the numpy array in a file   
inp=inputdata.values
np.save('/data/home/globc/peatier/CNRMppe/crash_emulator/inputdata_file.npy', inp)

In [None]:
#inputdata[param_names[0]].hist()
#plt.show()

In [None]:
#inputdata[param_names[1]].hist()
#plt.show()

In [None]:
#Create plot


scatter_matrix(inputdata[param_names[0:9]], alpha=0.2, figsize=(15, 15), diagonal='hist')
plt.show()


scatter_matrix(inputdata[param_names[10:19]], alpha=0.2, figsize=(15, 15), diagonal='hist')
plt.show()


scatter_matrix(inputdata[param_names[20:29]], alpha=0.2, figsize=(15, 15), diagonal='hist')
plt.show()

## Read output.csv and create outputdata array

In [None]:
# Read data from file 'output.csv' 
# Then arrange nicely the data in dataframe and change data type of the values
data_out = pd.read_csv("./output.csv",sep=' ', header=None)
#data_out 

In [None]:
#Replace the ID by the line nmber
dfindex=data_out.replace('TUNINGV631.', ' ', regex=True)
#dfindex

In [None]:
# Create a dataframe 300x1 full of '1'
inputdata['Y']=1
Y = inputdata['Y'].to_frame()
#Y 

In [None]:
# Loop to update Y based on dfindex

list_line=[]

N=len(dfindex)
i=0
while i<N:
    nb = dfindex.iloc[i].astype(int)
    line_nb = ( nb - 1 )
    list_line.append(line_nb)
    i=i+1

Y.iloc[list_line] = 0
Y # 1=succeeded, 0=failed

In [None]:
# Save the numpy array in a file   
out=Y.values
np.save('/data/home/globc/peatier/CNRMppe/crash_emulator/outputdata_file.npy', out)