In [0]:
import numpy as np

In [0]:
def name2feature(filename, B = 128, FIX = 3, Loadfile = True):
  
  def hashfeatures(baby):
    # Takes a name and hash its first and last "FIX" number of letters and hash them
    # After hashing, they are converted into vector indices and those vector values are assigned values 1.
    
    n = np.zeros((B,1))
    for i in range(FIX):
      name = "prefix" + baby[0:min(i+1,len(baby))]
      n[abs(hash(name)%B)] = 1
      name = "suffix" + baby[max(len(baby)-1-i,0):(len(baby))]
      n[abs(hash(name)%B)] = 1
    return n

  if Loadfile:
    file1 = open(filename,'r')
    babynames = [name for name in file1.read().splitlines()]
    file1.close()
  else:
    babynames = filename.split('\n')

  x = np.zeros((len(babynames),B))
  
  for i in range(len(babynames)):
    x[i,:] = hashfeatures(babynames[i]).reshape(-1)
  return x



In [0]:
def genTrainFeatures(dim = 128,fix = 3):
  x = name2feature('boys.train',B = dim, FIX = fix)
  y = name2feature('girls.train',B = dim, FIX = fix)
  a = np.ones((x.shape[0],1))
  b = -1*np.ones((y.shape[0],1))
  X = np.concatenate((x, y), axis = 0)
  Y = np.concatenate((a, b), axis = 0)
  np.random.seed(42)
  perm = np.random.permutation(X.shape[0])
  X = X[perm,:]
  Y = Y[perm,:]
  return X,Y


### **Naive Bayes Linear Classifier**

In [0]:
def naivebayesPY(x,y):
  pos = ((y==1).sum())/len(y)
  neg = 1 - pos
  return pos,neg

In [0]:
def naivebayesPXY(x,y):
  # x : nxd Matrix
  # y : nx1 Labels
  # Output:
  # posprob: probability vector of p(x|y=1) (dx1)
  # negprob: probability vector of p(x|y=-1) (dx1) 

  y1 = (y==1).sum()
  y2 = (y==-1).sum()
  ind = (y==1).reshape(-1).tolist()
  ind2 = (y==-1).reshape(-1).tolist()
  posprob = (((np.sum(x[ind,:], axis = 0))+1)/(y1+2)).reshape(-1,1)
  negprob = (((np.sum(x[ind2,:], axis = 0))+1)/(y2+2)).reshape(-1,1)
  return posprob,negprob

In [0]:
def naivebayes(x,y,xtest):
  # x : nxd
  # y : nx1
  # xtest : 1xd
  # Finding log-odds of p(y=1|x)/p(y=-1|x)

  posprob,negprob = naivebayesPXY(x,y)
  pos,neg = naivebayesPY(x,y)
  poscond = np.matmul(np.log(posprob),xtest.T) + np.log(pos)
  negcond = np.matmul(np.log(negprob),xtest.T) + np.log(neg)
  logodds = poscond - negcond
  return logodds



In [0]:
def naivebayesCL(x,y):
  posprob,negprob = naivebayesPXY(x,y)
  pos,neg = naivebayesPY(x,y)
  w = np.log(posprob) - np.log(negprob)
  b = np.log(pos) - np.log(neg)
  return w,b


In [0]:
def classifyLinear(x,w,b = 0):
  # x : nxd
  # w : dx1
  # b : scaler
  pred = np.matmul(x,w) + b
  pred = np.where(pred>0,+1,-1)
  return pred

In [0]:
def error(pred,true):
  # pred : nx1
  # true : nx1
  acc = (pred == true).sum()/len(true) 
  return (1-acc)
  

In [11]:
x,y = genTrainFeatures(dim = 128)
w,b = naivebayesCL(x,y)
pred = classifyLinear(x,w,b)
print("The error on the training data is",error(pred,y)*100,"%")

The error on the training data is 23.166666666666668 %


In [12]:
x,y = genTrainFeatures(dim = 1200)
w,b = naivebayesCL(x,y)
pred = classifyLinear(x,w,b)
print("The error on the training data is",error(pred,y)*100,"%")

The error on the training data is 12.250000000000005 %


In [13]:
dimentions = 128
x,y = genTrainFeatures(dim = dimentions)
w,b = naivebayesCL(x,y)
pred = classifyLinear(x,w,b)
print("Training error:",error(pred,y)*100,"%")
check = True
while check:
  name = input("Please enter your name : ")
  if len(name) == 0:
    break
  xtest = name2feature(name, B=dimentions, Loadfile= False)
  pred = classifyLinear(xtest, w, b)
  if pred == +1:
    print(name,",I am sure you are a nice boy!!!")
  else:
    print(name,",I am sure you are a nice girl!!!")

  


Training error: 23.166666666666668 %
Please enter your name : ramsha
ramsha ,I am sure you are a nice girl!!!
Please enter your name : angelina
angelina ,I am sure you are a nice girl!!!
Please enter your name : julie
julie ,I am sure you are a nice girl!!!
Please enter your name : adam
adam ,I am sure you are a nice girl!!!
Please enter your name : eve
eve ,I am sure you are a nice boy!!!
Please enter your name : christoph
christoph ,I am sure you are a nice girl!!!
Please enter your name : marina
marina ,I am sure you are a nice girl!!!
Please enter your name : melina
melina ,I am sure you are a nice girl!!!
Please enter your name : frietag
frietag ,I am sure you are a nice boy!!!
Please enter your name : stephen
stephen ,I am sure you are a nice boy!!!
Please enter your name : 
