# BioIDFace Notebook

### Notebook to implement a regression algorithm using the BioID Face database
http://www.bioid.com/support/downloads/software/bioid-face-database.html
#### It has 1521 files in pgm format. The dataset consists of 1521 gray level images with a resolution of
#### 384x286 pixel. Every picture shows the frontal view of a face of one out of
#### 23 different test persons. They are labeled with the coordinates of the left eye and of the right eye

In [12]:
import pandas as pd
import numpy as np

## Formatting the dataframe

In [13]:
#function that returns a string with the integer passed as argument with four characters, with as many leading '0' as necessary
def nome(i):
    nz=3
    s=i
    for j in range(3):
        s//=10
        if s!=0:
            nz-=1
    sr='0'*nz+str(i)
    return sr

In [14]:
#number of pictures in the data file
npictures=1521

In [15]:
# reading the data base to put the pixels of the pictures in a matrix denoted as Pic
nbytes=384*286+15     #number of bytes of every file: 15 bytes of header + 384*286 pixels
Pic=np.zeros((npictures,nbytes)).astype("uint8") 
for i in range(npictures):
 Pic[i]=np.fromfile('BioID-FaceDatabase-V1.2/BioID_'+nome(i)+'.pgm', dtype=np.uint8, count=-1, sep='')

#deleting the header of each file
Pic=Pic[:,15:]

In [16]:
Pic.shape

(1521, 109824)

In [17]:
#reading the data base with the positions of the eyes to put the coordinates in a matrix denoted as Pos
Pos=np.zeros((npictures,4)).astype("uint16")
for i in range(npictures):
    f=open('BioID-FaceDatabase-V1.2/BioID_'+nome(i)+'.eye')
    f.readline()            #read the first line with #LX LY RX RY
    s=f.readline()          #read the second line with the four coordinates
    k=''
    #num=[0]*4
    j=0
    for l in range(len(s)):
        if s[l]!='\t' and s[l]!='\n':
            k+=s[l]
        else:
            Pos[i,j]=int(k)
            k=''
            j+=1

In [18]:
print(Pos[356])

[182  72 132  72]


### Dataframe

In [12]:
namecolumn=pd.Series(range(len(Pic.T))).astype("str")
for i in range(len(Pic.T)):
    namecolumn[i]="pixel"+str(i)

In [13]:
namecolumn.values

array(['pixel0', 'pixel1', 'pixel2', ..., 'pixel109821', 'pixel109822',
       'pixel109823'], dtype=object)

In [14]:
df_BioIdFace=pd.DataFrame(Pic,columns=namecolumn)
df_BioIdFace['LX']=Pos[:,0]
df_BioIdFace['LY']=Pos[:,1]
df_BioIdFace['RX']=Pos[:,2]
df_BioIdFace['RY']=Pos[:,3]

In [15]:
df_BioIdFace.sample(10)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel109818,pixel109819,pixel109820,pixel109821,pixel109822,pixel109823,LX,LY,RX,RY
30,116,162,162,160,161,159,161,162,161,162,...,115,116,117,119,96,55,209,117,152,121
668,96,152,154,155,156,157,158,161,162,163,...,120,120,119,122,89,34,211,120,154,116
98,64,74,62,59,57,58,59,59,57,56,...,200,197,192,190,142,58,233,114,170,115
280,67,80,68,66,64,63,66,67,65,61,...,151,158,156,151,120,46,242,122,196,117
1518,122,184,185,186,190,186,186,174,153,139,...,88,84,87,91,69,36,228,141,157,135
710,73,109,106,107,107,111,112,110,111,110,...,7,7,9,13,9,1,259,96,184,90
77,118,156,152,152,149,148,151,148,150,152,...,104,104,106,107,89,53,235,143,150,147
241,65,94,88,86,81,83,81,83,88,82,...,200,207,205,196,156,55,251,98,202,98
1450,128,218,234,234,236,242,234,238,236,234,...,23,17,72,112,81,29,217,29,139,28
470,79,112,110,108,107,109,110,112,112,117,...,147,147,146,147,110,41,186,134,140,130


In [121]:
#gera arquivo csv
df_BioIdFace.to_csv('BioIdFace.csv',index=False)

# Regression analysis

Splitting training set (1300 pictures) and test set (221 pictures)

In [23]:
X_train=Pic[:1300,:]
Y_train=Pos[:1300,:]
X_test=Pic[1300:,:]
Y_test=Pos[1300:,:]

Using decision Tree Regressor

In [24]:
from sklearn.tree import DecisionTreeRegressor

In [25]:
regressor_1=DecisionTreeRegressor(max_depth=5)
regressor_1.fit(X_train,Y_train)

DecisionTreeRegressor(max_depth=5)

In [26]:
#Predicting the test set
Y_predict=regressor_1.predict(X_test)

In [27]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

754.7457492275274

In [28]:
print(Y_predict[125,:].astype("uint16"))
print(Y_test[125,:])


[226  72 173  74]
[235  95 184  95]


Using support vector regressors

In [113]:
from sklearn import svm
from sklearn.multioutput import MultiOutputRegressor

In [114]:
regressor_2=MultiOutputRegressor(svm.SVR())
regressor_2.fit(X_train,Y_train)

MultiOutputRegressor(estimator=SVR())

In [115]:
Y_predict=regressor_2.predict(X_test)

In [116]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

616.8711764791814

In [117]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[223  83 169  84]
[235  95 184  95]


Using extra tree regressor

In [46]:
from sklearn.ensemble import ExtraTreesRegressor

In [47]:
regressor_3=ExtraTreesRegressor(n_estimators=10, max_features=32, random_state=0)
regressor_3.fit(X_train,Y_train)

ExtraTreesRegressor(max_features=32, n_estimators=10, random_state=0)

In [48]:
Y_predict=regressor_3.predict(X_test)

In [49]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

620.4999355358548

In [51]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[227  73 169  75]
[235  95 184  95]


Using K neighbor regressor

In [52]:
from sklearn.neighbors import KNeighborsRegressor

In [53]:
regressor_4=KNeighborsRegressor()
regressor_4.fit(X_train,Y_train)

KNeighborsRegressor()

In [54]:
Y_predict=regressor_4.predict(X_test)

In [55]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

671.5872541970998

In [56]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[229  66 181  67]
[235  95 184  95]


Using linear regression

In [57]:
from sklearn.linear_model import LinearRegression

In [58]:
regressor_5=LinearRegression()
regressor_5.fit(X_train,Y_train)

LinearRegression()

In [59]:
Y_predict=regressor_5.predict(X_test)

In [60]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

97731597533.9582

In [61]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[227   0   0   0]
[235  95 184  95]


In [63]:
Y_predict[125,:]

array([ 2.27474249e+02,  1.06958139e+10,  6.48618227e+09, -6.48618203e+09])

Using Ridge CV

In [64]:
from sklearn.linear_model import RidgeCV

In [65]:
regressor_6=RidgeCV()
regressor_6.fit(X_train, Y_train)

RidgeCV(alphas=array([ 0.1,  1. , 10. ]))

In [66]:
Y_predict=regressor_6.predict(X_test)

In [67]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

739.9141407738517

In [68]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[227  83 164  79]
[235  95 184  95]


Using Extra Trees one more time

In [78]:
regressor_7=ExtraTreesRegressor(n_estimators=80, max_features=500, random_state=0)
regressor_7.fit(X_train,Y_train)

ExtraTreesRegressor(max_features=500, n_estimators=80, random_state=0)

In [79]:
Y_predict=regressor_7.predict(X_test)

In [80]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

584.7125041085897

In [81]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[228  76 172  76]
[235  95 184  95]


Using PLS regression

In [84]:
from sklearn.cross_decomposition import PLSRegression

In [105]:
regressor_8=PLSRegression(n_components=8)
regressor_8.fit(X_train,Y_train)

PLSRegression(n_components=8)

In [106]:
Y_predict=regressor_8.predict(X_test)

In [107]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

592.2286695393541

In [108]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[220  70 157  71]
[235  95 184  95]


Using Multi-layer Perceptron regressor.

In [118]:
from sklearn.neural_network import MLPRegressor

In [121]:
regressor_9= MLPRegressor(random_state=1, max_iter=100)
regressor_9.fit(X_train, Y_train)



MLPRegressor(max_iter=100, random_state=1)

In [122]:
Y_predict=regressor_9.predict(X_test)

In [123]:
#comparing the prediction with the results
np.linalg.norm(Y_predict-Y_test)

4475.921213917365

In [124]:
print(Y_predict[125,:].astype('uint16'))
print(Y_test[125,:])

[0 0 0 0]
[235  95 184  95]
