In [1]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

In [2]:
ROOT = 'E:/skia_projects/3d_facial_landmark/implementation_1/data_new/temp'

In [3]:
train_data = pd.read_csv(os.path.join(ROOT, 'train_balanced.csv'), delimiter=',', 
                         index_col=False, names=['X', 'Y', 'Z', 'label'])

In [4]:
# Separate input features (X) and target variable (y)
y = train_data.pop('label').values
X = train_data.values

In [5]:
# Train model
clf = RandomForestClassifier()
clf.fit(X, y)

RandomForestClassifier(n_estimators=200)

In [6]:
# Predict on training set
pred_y = clf.predict(X)

In [7]:
# Is our model still predicting just one class?
print(np.unique(pred_y) )

[0 1]


In [8]:
# How's our accuracy?
print(accuracy_score(y, pred_y))

1.0


In [9]:
# What about AUROC?
prob_y = clf.predict_proba(X)
prob_y = [p[1] for p in prob_y]
print( roc_auc_score(y, prob_y) )

1.0


In [11]:
# Predict on test data
test_data = pd.read_csv(os.path.join(ROOT, 'test.csv'), delimiter=',', index_col=False, usecols=[0,1,2])

In [12]:
print(test_data)

      -0.14728101  0.00526800  0.01012900
0       -0.079014    0.005291    0.000879
1       -0.095757    0.005285    0.000967
2       -0.115664    0.005279    0.001055
3       -0.055800    0.005268    0.001758
4       -0.148287    0.005269    0.019786
...           ...         ...         ...
9594    -0.034393    0.182729    0.123804
9595    -0.062175    0.182714    0.144613
9596    -0.048653    0.182730    0.137966
9597    -0.056554    0.182730    0.142383
9598    -0.075801    0.182728    0.146889

[9599 rows x 3 columns]


In [13]:
pred = clf.predict(test_data)

In [14]:
print(pred)

[0 0 0 ... 0 0 0]


In [15]:
class_names = ['background', 'landmark']

In [16]:
f = lambda x: class_names[int(x)]
vf = np.vectorize(f)

In [17]:
predicted_class = vf(pred)

In [18]:
print(predicted_class)

['background' 'background' 'background' ... 'background' 'background'
 'background']


In [19]:
# Saving predictions
data = np.column_stack((test_data, predicted_class))
print(data.shape)

(9599, 4)


In [20]:
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df.to_csv(os.path.join(ROOT, 'results.csv'), index=False)

In [21]:
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df[df.Predictions != 'background']
results = results.to_csv(os.path.join(ROOT, 'results_landmarks.csv'), index=False)

In [22]:
# Write only points
df = pd.DataFrame({'X': data[:,0], 'Y': data[:,1], 'Z':data[:,2], 'Predictions':data[:,3]})
results = df[df.Predictions != 'background']
results = results.to_csv(os.path.join(ROOT, 'results_landmarks_points.csv'), index=False, 
                         columns =('X', 'Y', 'Z'), header=False)