In [18]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import xgboost as XGB

In [19]:
ROOT = 'E:/skia_projects/3d_facial_landmark/implementation_1/data_new/temp'

In [20]:
train_data = pd.read_csv(os.path.join(ROOT, 'train_balanced.csv'), delimiter=',', 
                         index_col=False, names=['X', 'Y', 'Z', 'label'])

In [21]:
# Separate input features (X) and target variable (y)
y = train_data.pop('label').values
X = train_data.values

In [22]:
# Train model
clf = XGB.XGBRegressor()
clf.fit(X, y, verbose=True)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [24]:
# Predict on training set
#pred_y = clf.predict(X)
pred_y = np.floor(np.expm1(clf.predict(X)))

In [25]:
# Is our model still predicting just one class?
print(np.unique(pred_y) )

[-1.  0.  1.]


In [26]:
# How's our accuracy?
print(accuracy_score(y, pred_y))

0.7583445034668785


In [27]:
# What about AUROC?
prob_y = clf.predict_proba(X)
prob_y = [p[1] for p in prob_y]
print( roc_auc_score(y, prob_y) )

AttributeError: 'XGBRegressor' object has no attribute 'predict_proba'

In [40]:
# Predict on test data
test_data = pd.read_csv(os.path.join(ROOT, 'test.csv'), delimiter=',', index_col=False, usecols=[0,1,2])

In [41]:
print(test_data)

      -0.14728101  0.00526800  0.01012900
0       -0.079014    0.005291    0.000879
1       -0.095757    0.005285    0.000967
2       -0.115664    0.005279    0.001055
3       -0.055800    0.005268    0.001758
4       -0.148287    0.005269    0.019786
...           ...         ...         ...
9594    -0.034393    0.182729    0.123804
9595    -0.062175    0.182714    0.144613
9596    -0.048653    0.182730    0.137966
9597    -0.056554    0.182730    0.142383
9598    -0.075801    0.182728    0.146889

[9599 rows x 3 columns]


In [42]:
pred = np.floor(np.expm1(clf.predict(test_data)))



In [43]:
print(pred)

[ 0. -1. -1. ... -1. -1.  0.]


In [44]:
class_names = ['background', 'landmark']

In [45]:
f = lambda x: class_names[int(x)]
vf = np.vectorize(f)

In [46]:
predicted_class = vf(pred)

In [47]:
print(predicted_class)

['background' 'landmark' 'landmark' ... 'landmark' 'landmark' 'background']


In [48]:
# Saving predictions
data = np.column_stack((test_data, predicted_class))
print(data.shape)

(9599, 4)


In [49]:
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df.to_csv(os.path.join(ROOT, 'results.csv'), index=False)

In [50]:
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df[df.Predictions != 'background']
results = results.to_csv(os.path.join(ROOT, 'results_landmarks.csv'), index=False)

In [51]:
# Write only points
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df[df.Predictions != 'background']
results = results.to_csv(os.path.join(ROOT, 'results_landmarks_points.csv'), index=False, 
                         columns =('X', 'Y', 'Z'), header=False)