In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
# extract datafolder from pwd
npath = '/'.join(os.getcwd().split('/')[:-1])
npath += '/datasets/guass/' # change from gauss to any other dataset
npath

In [None]:
# Extract actual data
x_data = np.fromfile(npath+"x_b2.txt", sep=' ')
y_data = np.fromfile(npath+"y_b2.txt", sep=' ')
data = np.array([x_data,y_data]).T

In [None]:
plt.scatter(*data.T)#,c= np.ones(len(x)))
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")

In [None]:
cm = np.cov(data) # covariance matrix
cm.shape

In [None]:
V,S,U = np.linalg.svd(data,full_matrices=False)

In [None]:
U.shape

In [None]:
def colorgen(colora="rgbk"):
    for i in colora:
        yield i
cgen = colorgen("rbgk")
def plotVec(V,ax,namer="None",a=0,c='r'):
    V += a
#     c = next(cgen)
    ax.plot([a,V[0,0]],[a,V[0,1]],'-'+c,linewidth=3, label=r"$1^{st} PC$")
    ax.plot([a,V[1,0]],[a,V[1,1]],'--'+c,linewidth=2,label=r"$2^{nd} PC$")
    ax.legend()

In [None]:
fig,ax = plt.subplots(figsize=(6,4.5))
ax.plot(*data.T, 'o')

plotVec(2*U,ax,"Variance")
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")

## Low dim

In [None]:
pc = U[0]
## Picked first principle vector

xr = np.dot(data,pc) ## reduced
xp = np.outer(xr,pc) ## Predicted

In [None]:
print(xr.shape)
print(pc.shape)

In [None]:
plt.plot(*xp.T,'.r', label='Data Reconstructed from PC1')
plt.plot(*data.T,'.b')
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.legend()

## Make up a source term

In [None]:
x1,y1 = data.T
print(x1.shape)
print(y1.shape)
print(data.shape)

In [None]:
z1 = np.exp(0.62*(3.1*x1-2.08*y1))
z1.shape

In [None]:
n = 610

In [None]:
plt.plot(x1[:n],z1[:n],'o')
plt.plot(x1[n:],z1[n:],'ro')
plt.xlabel(r"$x_i$")
plt.ylabel(r"$f(x_1,x_2)$")


In [None]:
pc1 = U[0]
xr = np.dot(data,pc1)

In [None]:
xp = np.outer(xr,pc1)

**Recaculate source term from the predicted data: xp**

In [None]:
from sklearn.ensemble import RandomForestRegressor

## Random Forest (or Bootstrap Aggregation) should perform better that
## General additive model, (Mars for eg), Ref: Tibishirani, ESL page 322

In [None]:
model = RandomForestRegressor(n_estimators=500, oob_score=True, random_state=100)
model.fit(xp,z1)  ### should be zp? Check 1#

In [None]:
zp = model.predict(xp)

In [None]:
plt.plot(x1,z1,"b.",label="Source")
plt.plot(x1[n:],zp[n:],"r.",label="Predicted")
plt.xlabel(r"$x_i$")
plt.ylabel(r"$f(x_1,x_2)$")

plt.legend()

In [None]:
print("MSE is ",np.mean(np.abs(zp-z1)))

## Combinator

In [None]:
from ipywidgets import interact
import os,sys
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.svm import SVR,LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

In [None]:
from utils import co_kurtosis,co_variance

In [None]:
modeldict = {
    'linear' : LinearRegression(),
    'svm' : SVR(),
    'forest': RandomForestRegressor(n_estimators=500, oob_score=True, random_state=100),
    'knn' : KNeighborsRegressor(),
    'neural' : MLPRegressor()

}

In [None]:
def encode(x,method=co_variance):
    cm = method(x)
    V,S,U = np.linalg.svd(cm,full_matrices=False)
    pc = U[0] ### U[:1] num of vectors to chose
    xr = np.dot(x,pc)
    xe = np.outer(xr,pc)
    return xe ## encoded set

def decode(z,xp,model):
    model.fit(xp,z)
    zp = model.predict(xp)
    return zp


In [None]:
def mse(a,ap):
    return np.mean((a-ap)**2)

def tse(x,y):
    _conv = lambda x : np.array(x).T    
    x = _conv(x)
    y = _conv(y)
    return np.linalg.norm(x-y,ord=1,axis=1).sum()

mse = lambda a,ap: np.mean(np.abs(a-ap))

In [None]:
# x1,y1 = x.T
# z1 = np.exp(-0.62*(3.1*x1+1.08*y1))
dat = {'x':x1,'y':y1,'s':z1}

In [None]:
# result.setdefault(?)

In [None]:
pcdict = {'variance':co_variance,
         'kurtosis':co_kurtosis}
def comma(verbose=True,fi='x',si='s',enc=co_variance,dec=modeldict['linear']):
    xa = dat[fi]
    ya = dat[si]
    xen = encode(x,method=enc)
    xp = decode(xa,xen,model=dec)
    yp = decode(ya,xen,model=dec)

    if verbose:
        plt.plot(xa,ya,".",label="Original")
        plt.plot(xp,yp,"r.",label="Predicted")
        plt.xlabel(fi)
        plt.ylabel(si)
        plt.title(str(dec)[:16])
        plt.legend()
        plt.show()
    e1,e2 = mse(x1,xp),mse(y1,yp)
    e3 = tse([x1,y1],[xp,yp])
    
    if verbose:
        print(f""" method {enc},
        Reconstruction
        x error {e1} 
        y error {e2}
        total err {e3}
        """)
    return e1,e2,e3
    
interact(comma,verbose=[True,False],fi=['x','y'],si=['y','s'],enc=pcdict,dec=modeldict)

In [None]:
results = {}
for k,v in pcdict.items():
    results[k] = {}
    for n,m in modeldict.items():
        e1,e2,e3 = comma(enc=v,dec=m,verbose=False)
        results[k][n] = [e1,e2,e3]
    
        
        

In [None]:
import pandas as pd


In [None]:
pd.DataFrame(results)

In [None]:
errs = []
for i in range(3):
    errs.append({})
    for k,v in results.items():
        errs[i][k] = {}
        for m,eri in v.items():
            errs[i][k][m] = eri[i]
# results

In [None]:
plt.

In [None]:
df1 = pd.DataFrame(errs[0])


df1.plot.barh()#(kind='bar')
plt.title("Species reconstruction error")

In [None]:
x = np.random.rand(4,2)
x

In [None]:
import pandas as pd
df = pd.DataFrame(x)
df.index = ["First","second","Thired","Finiss"]
df

In [None]:
n = 610

plt.

In [None]:
plt.figure(figsize=(3,2))
df1.plot.barh()
plt.xlabel("Reconstruction Errors")
plt.ylabel("Reconstruction Method")


In [None]:
Original --------->  LowDim ======> Reconstruction

In [None]:
Chem species xi --------> eta_i (trim_few pcs) ========>

In [None]:
------> is Variance/Kurtosis

In [None]:
new = {"data":0,
      "fifty": 50,
      "Four":4}
for k,v in new.items():
    print(k,v)