In [28]:
from sklearn.pipeline import Pipeline
from dask_ml.cluster import KMeans
from dask_ml.decomposition import PCA
import dask.array as da
import xarray as xr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


In [29]:
low_fidelity_dataset = '/home/ppiper/Dropbox/local/ihtc_nozzle/data/doe_lhs_multirun_N200/Q1D.hdf5'
low_fidelity_vars_to_import = ['p.txt', 'T.txt', 'M.txt']


high_fidelity_dataset = '/home/ppiper/Dropbox/local/ihtc_nozzle/data/doe_lhs_multirun_N200/SU2_fluid.hdf5'
high_fidelity_vars_to_import = ['Pressure', 'Temperature', 'Mach', 'Heat_Flux']

lfds = xr.open_dataset(low_fidelity_dataset, chunks=-1)

lfds = lfds.swap_dims({
    'phony_dim_5': 'M',
    'phony_dim_6': 'id'
})

lfds = lfds.assign_coords({
    'M': ("M", lfds['M'].values),
    'id': ("id", lfds['id'].values),
    'T0in': ("id", lfds['T0in'].values), 
    'p0in': ("id", lfds['p0in'].values),
    'thickness': ("id", lfds['thickness'].values),
})

hfds = xr.open_dataset(high_fidelity_dataset, chunks=-1)

hfds = hfds.swap_dims({
    'phony_dim_5': 'M',
    'phony_dim_6': 'id',
    'phony_dim_7': 'N',})

hfds = hfds.assign_coords({
    'T0in': ("id", hfds['T0in'].values), 
    'p0in': ("id", hfds['p0in'].values),
    'thickness': ("id", hfds['thickness'].values),
    'wall_x' : (["M","id"], hfds['wall_x'].values),
    'wall_y' : (["M","id"], hfds['wall_y'].values),
})

lf_snapshots = da.concatenate([lfds[var].values for var in low_fidelity_vars_to_import])

hf_snapshots = da.concatenate([hfds[var].values for var in high_fidelity_vars_to_import])

In [30]:
steps = [('pca', PCA(n_components=10)),
         ('linear', LinearRegression())]

pipe = Pipeline(steps)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(
    lf_snapshots.T, hf_snapshots.T, test_size=0.3, random_state=42)

In [49]:
pca = pipe.named_steps['pca']

In [50]:
pipe.fit(X_train, pca.fit_transform(y_train))

(55, 633)