In [3]:
import pandas as pd
import numpy as np
from numpy.linalg import det,inv
from math import sqrt,pi
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [12]:
def qdaLearn(X,y):
    '''
     Inputs
         X - a N x d matrix with each row corresponding to a training example
         y - a N x 1 column vector indicating the labels for each training example
    
     Outputs
         means - A d x k matrix containing learnt means for each of the k classes
         covmats - A list of k d x d learnt covariance matrices for each of the k classes
    '''
   
    
    # IMPLEMENT THIS METHOD
    covmats = []
    labels = np.unique(y)
    
    total_label   = labels.shape[0]
    total_feature = X.shape[1]
    
    means  = np.zeros([total_label,total_feature])
     
    r = 0
    for i in labels:
        data = X[np.where(y == i)[0],]
        m = np.mean(data,axis=0)
        means[r,] = m
        r +=1
        data_transpose = np.transpose(data)
        covmats.append(np.cov(data_transpose))
        
    return means,covmats

In [21]:
def qdaTest(means,covmats,Xtest,ytest):
    # Inputs
    # means, covmats - parameters of the QDA model
    # Xtest - a N x d matrix with each row corresponding to a test example
    # ytest - a N x 1 column vector indicating the labels for each test example
    # Outputs
    # acc - A scalar accuracy value
    # ypred - N x 1 column vector indicating the predicted labels
    
    # IMPLEMENT THIS METHOD
    r = Xtest.shape[0]
    c = means.shape[0]
    res = np.zeros((r,c))
    
    for j in range(means.shape[0]):
        f = 1/np.sqrt((2*pi)**means.shape[1]*det(covmats[j]))
        res[:,j] = f * np.exp(-0.5*np.array([np.dot(np.dot((Xtest[i,:] - means[j,:]),inv(covmats[j])),np.transpose(Xtest[i,:] - means[j,:])) for i in range(Xtest.shape[0])]))
    ypred = np.argmax(res,axis=1)
    res = (ypred - ytest.ravel())
    acc_data = np.where(res)[0]
    acc = len(acc_data)
    return float(acc)/len(ytest),ypred

In [26]:
data2 =pd.read_csv('data1n.csv',header=None)
X=data2.iloc[:,:-1]
Y=data2.iloc[:,-1]
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.3,random_state=1)
m,cv = qdaLearn(xtrain,ytrain)

TypeError: '(array([  0,   6,   7,  12,  13,  14,  15,  17,  19,  20,  23,  24,  27,
        28,  32,  33,  34,  35,  36,  37,  38,  39,  43,  47,  49,  51,
        55,  57,  62,  63,  64,  66,  67,  68,  72,  73,  74,  76,  78,
        79,  80,  81,  82,  83,  87,  88,  96,  98,  99, 100, 102, 103,
       104, 107, 114, 115, 120, 121, 122, 123, 124, 125, 127, 129, 131,
       132, 133, 135, 136, 138], dtype=int64),)' is an invalid key

In [27]:
data2

Unnamed: 0,0,1,2
0,14,42,1
1,29,40,1
2,22,22,1
3,11,20,1
4,10,35,1
...,...,...,...
195,46,30,-1
196,32,17,-1
197,4,33,-1
198,12,4,-1
