# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [249]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [250]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data

In [24]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    n, m = x.shape
    permutation = np.random.permutation(n)
    x = x[permutation]
    y=y[permutation]
    split_position = int(n*train_size)
    return x[:split_position], x[split_position:], y[:split_position], y[split_position:]

In [31]:
X_train, X_test, y_train, y_test= split_data(x, y) # split your data into x_train, x_test, y_train, y_test
# print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
# print(len(X_train.T[0])

800


In [27]:
def covariance(x, mu):  # mu = 1x3 and x= nx3  for a single class | eg. y=0
    return 
    # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
    pass

In [51]:
# def covariance(x, mu):  # mu = 1x3 and x= nx3  for a single class | eg. y=0
#     n, d = x.shape
#     sigma = np.zeros((d,d))
#     for di in range(d):
#         for dx in range(d):
#             vac = np.zeros(n)
#             for i in range(n):
#                 vac[i]= (x[i][di]-mu[di])*(x[i][dx]-mu[dx])
#             sigma[di,dx]=np.mean(vac)
#     return sigma

In [52]:
covariance(x, x.mean(0))

array([[1.84310829, 0.02787855, 1.00037396],
       [0.02787855, 1.0007055 , 0.05533637],
       [1.00037396, 0.05533637, 1.74657168]])

In [50]:
np.cov(x, rowvar= 0)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [251]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None
    
    
  def fit(self,x,y):
    k=len(np.unique(y)) # Number of class.
    m,d=x.shape  # input dim
    # m= ... # Number of examples.
    self.sigma = np.zeros((k, d, d))
    ## Initialize mu, phi and sigma
    self.mu,self.phi= self.calc_mus(x,y,k)#: kxd, i.e., each row contains an individual class mu.
    for ki in range(k):
        indexes = np.where(y==ki)
        self.sigma[ki]= self.covariance(x[indexes], self.mu[ki])
    #: kxdxd, i.e., each row contains an individual class sigma.
#     , self.= self.cal_phis(x,y)# d-dimension
    ## START THE LEARNING: estimate mu, phi and sigma.


  def predict_proba(self,x):
    n,d= x.shape
    k_class= 2 # Number of classes we have in our case it's k = 2
    vals = []
    p = np.zeros((n,k_class))
    for b in range(k_class):
        for i in range(n):
#             print(1/(2*np.pi)**(d/2)*np.sqrt(np.linalg.det(self.sigma[b])))
            p[i,b]=(1/(2*np.pi)**(d/2)*np.sqrt(np.linalg.det(self.sigma[b])))*np.exp(-((x[i]-self.mu[b]).T@np.linalg.inv(self.sigma[b])@(x[i]-self.mu[b])))
    return p
    
  def covariance(self, x, mu):  # mu = 1x3 and x= nx3  for a single class | eg. y=0
    n, d = x.shape
    sigma = np.zeros((d,d))
    for di in range(d):
        for dx in range(d):
            vac = np.zeros(n)
            for i in range(n):
                vac[i]= (x[i][di]-mu[di])*(x[i][dx]-mu[dx])
            sigma[di,dx]=np.mean(vac)
    return sigma
    # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.

  def calc_mus(self,x, y, k):
    n, d = x.shape
    self.mu = np.zeros((k, d))
    self.phi = np.zeros((k, d))
    for i in range(k):
        indexes = np.where(y==i)
        x_transposed = x[indexes].T
        for di in range(d):
            self.mu[i, di]= np.mean(x_transposed[di])
            self.phi[i,di] = np.sum(x_transposed[di])/len(x)
    return self.mu, self.phi

  def predict(self,x):
    y_pred = self.predict_proba(x)
    return y_pred.argmax(axis=1)
  
  def accuracy(self, y, ypreds):
    return np.mean(np.where(y==ypreds,1,0))

In [252]:
model= GDA()
model.calc_mus(x, y, 2)

sigma = np.zeros((2, 3, 3))

model.fit(X_train, y_train)
# x_pred=model.predict_proba(X_test)
ya= model.predict(X_test)
acc=model.accuracy(ya, y_test)
print(acc)

0.975


In [241]:
yproba= model.predict_proba(X_test)
yproba

array([[1.24388630e-030, 6.36084727e-003],
       [5.30732624e-005, 2.83002177e-015],
       [5.77797892e-006, 2.26898068e-011],
       [5.38376843e-003, 1.01836776e-004],
       [1.53192920e-016, 1.96720787e-003],
       [2.76340300e-072, 5.29588053e-003],
       [9.47484472e-003, 4.09571588e-008],
       [5.40025871e-023, 5.75293396e-005],
       [3.87316094e-062, 1.10520525e-002],
       [1.32240766e-117, 3.07336317e-003],
       [4.77100384e-005, 1.25442362e-042],
       [7.23396168e-032, 1.83479939e-003],
       [7.50864355e-027, 3.55106898e-003],
       [1.88823749e-003, 5.31616031e-005],
       [2.94124141e-006, 1.00692760e-022],
       [5.88596969e-004, 6.21835420e-016],
       [1.38570994e-003, 1.31849138e-011],
       [2.83013274e-053, 3.45970438e-003],
       [2.72895857e-005, 7.40770985e-008],
       [5.09954566e-015, 3.44595681e-004],
       [1.09947049e-003, 8.73551010e-007],
       [1.51037877e-006, 9.51027289e-040],
       [3.26419758e-024, 1.85817089e-002],
       [6.8

In [242]:
ypreds= model.predict(X_test)
ypreds

array([1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1])

In [243]:
model.accuracy(y_test, ypreds)

97.5

In [244]:
a = np.array([1,0,1,1,0,1,0,0,0,0])
print(np.where(a==1))

(array([0, 2, 3, 5]),)
