In [12]:
import LOM
from sklearn.cross_validation import train_test_split
from sklearn import metrics




## Steps:
Generate three types of process through kernels:
- Gaussian -1
- Increasing/Decreasing linear trend
- Gaussian -2
- Generate latent process and a random phi add noise
- Label processes as coming from increasing trend-G1 as 1’s and decreasing trend-G1/G2 as 0’s
- Increasing n, check classification accuracy 
- Compare with Logistic regression and LDA: Stretch goal


In [1]:
## Linear increasing kernel
import GPy
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
kenrel = GPy.kern.Linear(1,variances=3)  * GPy.kern.Brownian(1,variance=2) 
kenrel = GPy.kern.Linear(1,variances=3) * GPy.kern.Brownian(1,variance=3) + GPy.kern.Matern52(1,variance=2,lengthscale=3) 
Matern1 = GPy.kern.PeriodicMatern32(1,lengthscale=3,variance=2)
Cosine1 = GPy.kern.Cosine(1,lengthscale=3,variance=2)
gaussian = GPy.kern.ExpQuad(1,lengthscale=3,variance=2)

In [2]:
def scale(d):
    return (d - d.min())/(d.max() - d.min())

def generatelatent(N,noiselevel=0.5):
    x = np.linspace(0,N-1,N)
    u1 = np.random.multivariate_normal(mean=np.zeros(N).flatten(),cov=kenrel.K(x.reshape(N,1))) + noiselevel* np.random.normal(0,1,N)
    u2 = np.random.multivariate_normal(mean=np.zeros(N).flatten(),cov=gaussian.K(x.reshape(N,1))) + noiselevel* np.random.normal(0,1,N)
    g1 = np.random.multivariate_normal(mean=np.ones(N).flatten(),cov=Matern1.K(x.reshape(N,1))) + noiselevel* np.random.normal(0,1,N)
    g2 = np.random.multivariate_normal(mean=np.ones(N).flatten(),cov=Cosine1.K(x.reshape(N,1))) + noiselevel*np.random.normal(0,1,N)
    return scale(u1),scale(u2),scale(g1),scale(g2)


Now we know our latent processes: g1,g2,increasingTrend,decreasingTrend
We sample phi and then combine these to generate data such that:
    - with probability 0.5:
        get g1 and increasing trend set Label to 1
     - With probability 0.5:
         get g1 or g2 with equal probability combine with decreasing trend and set label to -1
        

In [3]:
def generateData(W,phi,latents,S=100,C=3,P=2,N=100):
    #phi = np.random.normal(loc=0,scale=1,size=(C,P))
    phiBar = np.matrix(np.kron(phi,np.eye(N)))
    B = np.ones(S)
    #W = np.random.rand(P*N)
    uBar = np.matrix(np.ones((S,N*P)))
    l = np.zeros(S)
    L = np.zeros(S)
    for s in range(S):
        if np.random.rand() < 0.4:
            L[s] = 1
            us = np.hstack([a for i,a in enumerate(latents) if i!=1 ])
        else:
            L[s] = -1
            us = np.hstack([a for i,a in enumerate(latents) if i!=1 ])
        uBar[s,:] = us
        l[s] = np.dot(uBar[s,:] ,W) + B[s]
        #L[s] = guessLabel(l[s])
    YBar = uBar * phiBar.T + np.random.normal(0,1)
    return YBar,L

### testing Infra:

## Testing the effect of inducing Points

In [None]:
Cs = range(1,10)
N = 100
actualP = 3
guessedP = 5
S= 200

ind = 0.8
accuracies = {}
f1Scores = {}
for c in Cs:
    f1=[]
    accu=[]
    print "\n---\n C:",c
    W = np.random.rand(actualP*N)
    phi = np.random.normal(loc=0,scale=1,size=(c,actualP))
    for times in range(5):
        #generate Latent Proceses:
        latent = generatelatent(N=N)
        #generate data
        YBar,L = generateData(W,phi,latents=latent,C=c,N=N,P=actualP,S=S)
        # create train test split
        Y_train, Y_test, L_train, L_test = train_test_split(YBar, L, test_size=0.4, random_state=0)
        # fit the model
        myModel = LOM.LOM(Y=Y_train,L=L_train,N=N,C=c,S=L_train.shape[0])
        myModel.fit(n=int(N*ind),iters=200,P=guessedP)
        # predict for test
        predictions,Expresults, _ = myModel.predict(Y_test)
        # calcualte accuracy
        print "Result:",np.unique(predictions,return_counts=True),np.unique(L_test,return_counts=True)
        f1.append(metrics.f1_score(y_pred=predictions,y_true=L_test))
        accu.append(metrics.accuracy_score(y_pred=predictions,y_true=L_test))
    print f1,"\n",accu
    f1Scores[c] = np.mean(f1)
    accuracies[c] = np.mean(accu)
#plt.plot(induction,results)


---
 C: 1
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0



 20 40 60 80 100 120 140 160 180 Result: (array([-1.]), array([80])) (array([-1.,  1.]), array([56, 24]))
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0 20 40 60 80 100 120 140 160 180 Result: (array([-1.]), array([80])) (array([-1.,  1.]), array([41, 39]))
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0 20 40 60 80 100 120 140 160 180 Result: (array([-1.]), array([80])) (array([-1.,  1.]), array([53, 27]))
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0 20 40 60 80 100 120 140 160 180 Result: (array([-1.]), array([80])) (array([-1.,  1.]), array([41, 39]))
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0 20 40 60 80 100 120 140 160 180 Result: (array([-1.]), array([80])) (array([-1.,  1.]), array([40, 40]))
[0.0, 0.0, 0.0, 0.0, 0.0] 
[0.69999999999999996, 0.51249999999999996, 0.66249999999999998, 0.51249999999999996, 0.5]

---
 C: 2
Concatenated Latent Gaussian Processes:
(500, 400) (400, 400) 0 20 40 60 80 100 120 140 16

In [4]:
import pickle

In [8]:
YBar = pickle.load(open("YBar.data","rb"))
L = pickle.load(open("Labels.data","rb"))

In [9]:
YBar.shape

(100, 300)

In [10]:
Y_train, Y_test, L_train, L_test = train_test_split(YBar, L, test_size=0.4, random_state=0)
myModel = LOM.LOM(Y=Y_train,L=L_train,N=60,C=5,S=L_train.shape[0])
myModel.fit(n=int(N*ind),iters=200,P=3)

NameError: name 'train_test_split' is not defined