In [423]:
import numpy as np
from sklearn.datasets import make_hastie_10_2

### Important Parameters:

In [424]:
N_SAMPLE = 200
TV_RATIO = 0.8
N_TRAIN = int(N_SAMPLE*TV_RATIO)

### Get Data:

In [425]:

X,y = sklearn.datasets.make_hastie_10_2(n_samples=N_SAMPLE, random_state=None)

In [421]:
print(X[:5,:])

[[-0.89285345  0.24279028 -1.1444126  -0.33471867  0.58528437  1.08140706
   0.5519119   0.47022415 -0.8429417   0.37413704]
 [-1.73162857 -0.26329552  0.51869942 -0.26950049  2.00294608  2.45408667
  -1.23503318  1.40683048  1.19969165  0.92684624]
 [-0.37972927  0.95199815 -0.47972362 -0.45041395 -0.52770192 -0.86058965
  -0.78772956  2.07316509 -0.09069499  0.29210754]
 [ 0.65714564  0.82801008  0.96381419  1.4408851  -0.66706405  0.67435262
   2.35106398 -0.61607145  0.76966316 -0.9344723 ]
 [-0.3331528  -0.13646056 -0.14160308  0.9842972  -1.44209571 -0.66258911
  -0.11309678  0.07824391  0.68342435 -0.19026776]]


In [422]:
print(y[:5])

[-1.  1. -1.  1. -1.]


In [383]:
y[y==1] = 1
y[y==-1] = 0

In [385]:
X,X_test = (X[:N_TRAIN,:],X[N_TRAIN:,:])
y,y_test = (y[:N_TRAIN],y[N_TRAIN:])

In [386]:
print(X.shape,X_test.shape)

(160, 10) (40, 10)


In [387]:
print(y.shape,y_test.shape)

(160,) (40,)


In [388]:
class NB_Binary_Classifier(object):
    
    def fit(self, X,y):
        
        def col_stats(dat):
            '''helper fn calc mean and var'''
            res = []
            for i in range(0,dat.shape[1]):
                res.append((np.mean(dat[:,i]),np.var(dat[:,i],ddof=1)))
            return res
    
        y = y.reshape(-1,1)
        dat = np.hstack([y,X])
        self.category = np.unique([i[0] for i in dat]).tolist()
        X0 = X[np.ravel((y==self.category[0])),:]
        X1 = X[np.ravel((y==self.category[1])),:]
        # Output
        self.dat0_stats = col_stats(X0)
        self.dat1_stats = col_stats(X1)
        self.P0 = len(X0)/len(y)
        self.P1 = len(X1)/len(y)

    
    def predict(self,X_test):
        def prob_distribution(mu,var,x):
            '''helper fn calc distribution'''
            return (1/np.sqrt(2*np.pi*var)) * np.exp( (-(x-mu)**2)/(2*var) )
        
        
        n = X_test.shape[1]

        self.PP0s = []
        self.PP1s = []
        self.evidences = []
        predictions = []
        
        
        for row in range(X_test.shape[0]):
            post_p = []
            cat0_pp = []
            cat1_pp = []
            #Cat0
            for col in range(X_test.shape[1]):
                #cat 0 pp:
                cat0_pp.append(prob_distribution(self.dat0_stats[col][0],self.dat0_stats[col][1],X_test[row][col]))
                #cat 1 pp:
                cat1_pp.append(prob_distribution(self.dat1_stats[col][0],self.dat1_stats[col][1],X_test[row][col]))


            PP0 = self.P0 * np.prod(cat0_pp)
            PP1 = self.P1 * np.prod(cat1_pp)
            evidence = PP0+PP1

            
            self.cat0_pp = cat0_pp
            self.cat1_pp = cat1_pp
            self.PP0s.append(PP0)
            self.PP1s.append(PP1)
            self.evidences.append (evidence)
            
            # Output:
            
            predictions.append( ( round(PP0/evidence,3), round(PP1/evidence,3)   )  )
        
        
        return predictions

        
        
    

        

In [389]:
model = NB_Binary_Classifier()

In [390]:
model.fit(X=X,y=y)

In [391]:
prediction = model.predict(X_test)
prediction

[(0.87, 0.13),
 (0.144, 0.856),
 (0.825, 0.175),
 (0.067, 0.933),
 (0.922, 0.078),
 (0.304, 0.696),
 (0.929, 0.071),
 (0.711, 0.289),
 (0.452, 0.548),
 (0.858, 0.142),
 (0.387, 0.613),
 (0.727, 0.273),
 (0.629, 0.371),
 (0.807, 0.193),
 (0.002, 0.998),
 (0.182, 0.818),
 (0.333, 0.667),
 (0.432, 0.568),
 (0.859, 0.141),
 (0.53, 0.47),
 (0.117, 0.883),
 (0.391, 0.609),
 (0.259, 0.741),
 (0.407, 0.593),
 (0.13, 0.87),
 (0.628, 0.372),
 (0.652, 0.348),
 (0.524, 0.476),
 (0.348, 0.652),
 (0.164, 0.836),
 (0.85, 0.15),
 (0.952, 0.048),
 (0.249, 0.751),
 (0.137, 0.863),
 (0.269, 0.731),
 (0.031, 0.969),
 (0.033, 0.967),
 (0.052, 0.948),
 (0.424, 0.576),
 (0.059, 0.941)]

In [405]:
pred_res = [np.argmax(i) for i in prediction]

### Check CV Accuracy:

In [412]:
def accuracy(prediction,label):
    return float(np.sum(np.ravel(prediction)==np.ravel(label)))*100/len(prediction)
    

In [415]:
acc = accuracy(pred_res,y_test)

In [417]:
print('The cross validation result is {}%'.format(acc))

The cross validation result is 80.0%
