In [1]:
import numpy as np
import csv
import matplotlib.pyplot as plt
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import neighbors
import sklearn
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix

sklearn.__version__

'0.20.4'

In [69]:
def get_cloth_dir_names(p1, p2, p3):
    cloth0_dir_names = []
    cloth1_dir_names = []
    cloth2_dir_names = []
    for dir in sorted(os.listdir(p1)):
        cloth0_dir_names.append(p1+"/"+dir)

    for dir in sorted(os.listdir(p2)):
        cloth1_dir_names.append(p2+"/"+dir)

    for dir in sorted(os.listdir(p3)):
        cloth2_dir_names.append(p3+"/"+dir)
    return cloth0_dir_names, cloth1_dir_names, cloth2_dir_names
def get_dataset_dir_names(cloth0_dir_names, cloth1_dir_names, cloth2_dir_names, num_train):
    train_dir_names = cloth0_dir_names[0:num_train] + cloth1_dir_names[0:num_train] + cloth2_dir_names[0:num_train]
    test_dir_names = cloth0_dir_names[num_train:] + cloth1_dir_names[num_train:] + cloth2_dir_names[num_train:]
    return train_dir_names, test_dir_names

def get_raw_data(train_dir_names, test_dir_names, new_collect = True):
    X = []
    Y = []
    x_test = []
    y_test = []
    for dir_name in train_dir_names:
        if(new_collect):
            x,y = get_data_XY_new(dir_name)
        else:
            x,y = get_data_XY(dir_name)
        X.append(x)
        Y.append(y)
    X = np.vstack(X)
    Y = np.vstack(Y)

    for dir_name in test_dir_names:
        if(new_collect):
            x,y = get_data_XY_new(dir_name)
        else:
            x,y = get_data_XY(dir_name)
        x_test.append(x)
        y_test.append(y)
    x_test = np.vstack(x_test)
    y_test = np.vstack(y_test)
    return X,Y, x_test, y_test

def normalize_data(X, x_test):
    scaler = preprocessing.StandardScaler().fit(X)
    X = scaler.transform(X)
    x_test = scaler.transform(x_test)
    return X, x_test

def minmax_scale_data(X, x_test):
    scaler = preprocessing.RobustScaler().fit(X)
    X = scaler.transform(X)
    x_test = scaler.transform(x_test)
    return X, x_test

def get_dataset_from_dir_names(cloth0_dir_names, cloth1_dir_names, cloth2_dir_names, num_train, new_collect = True, minMax=True, standard=True):
    train_dir_names, test_dir_names = get_dataset_dir_names(cloth0_dir_names, cloth1_dir_names, cloth2_dir_names, num_train)
    X,Y, x, y = get_raw_data(train_dir_names, test_dir_names, new_collect)
    if(minMax):
        print("inside minMax")
        X, x = minmax_scale_data(X, x)
    elif(standard):
        X, x = normalize_data(X, x)
    return X,Y,x,y
    
def create_knn_clf_from_dir(p1,p2,p3, num_train=10, nn = 10, new_collect = True):
    cloth0_dir_names, cloth1_dir_names, cloth2_dir_names = get_cloth_dir_names(p1, p2, p3)
    X,Y,x,y = get_dataset_from_dir_names(cloth0_dir_names, cloth1_dir_names, cloth2_dir_names, num_train, new_collect)
    print(X.shape, Y.shape, x.shape, y.shape)
    clf = neighbors.KNeighborsClassifier(nn, weights="distance")
    clf.fit(X, Y.ravel())
    y_pred = clf.predict(x)
    score = balanced_accuracy_score(y, y_pred)
    print("balanced accuracy: ",score)
    y_pred = clf.predict(x)
    best_cf = confusion_matrix(y, y_pred)
    print("confusion_matrix: ", best_cf)
    return clf

def get_data_XY_new(dir_name):
    file_name = dir_name+"/reskin_data.csv"
    data = np.loadtxt(file_name, delimiter = ",")
    X = data[:,:-2]
    Y = np.reshape(data[:,-2],(-1,1))
    return X,Y

Nearest NEighbours algorithm taking a sequence of points, not just a single point

In [44]:
num_of_points = 10
def create_new_set(x,y, num_of_points):
    new_x = np.zeros([x.shape[0] - num_of_points+1, num_of_points, x.shape[1]])
    new_y = np.zeros([x.shape[0] - num_of_points+1])
    for i in range(new_x.shape[0]):
        new_x[i,:,:] = x[i:i+num_of_points,:]
        new_y[i] =np.around(np.sum(y[i:i+num_of_points])/float(num_of_points))
    return new_x, new_y

def create_new_set2D(x,y, num_of_points):
    new_x = np.zeros([x.shape[0] - num_of_points+1, num_of_points*x.shape[1]])
    new_y = np.zeros([x.shape[0] - num_of_points+1])
    for i in range(new_x.shape[0]):
        new_x[i,:] = x[i:i+num_of_points,:].ravel()
        new_y[i] =np.around(np.sum(y[i:i+num_of_points])/float(num_of_points))
    return new_x, new_y
  
    

In [46]:
x = np.random.rand(10,2)
y = np.array([0,0,0,1,1,1,1,0,0,0])
print(x,y)
create_new_set2D(x,y,2)

(array([[0.95543233, 0.78004979],
       [0.41198375, 0.98176987],
       [0.19560602, 0.54153093],
       [0.03420482, 0.07474429],
       [0.00493153, 0.21823705],
       [0.48290199, 0.55021938],
       [0.62271498, 0.63754244],
       [0.57604012, 0.79791766],
       [0.82829616, 0.40432693],
       [0.0953026 , 0.1953231 ]]), array([0, 0, 0, 1, 1, 1, 1, 0, 0, 0]))


(array([[0.95543233, 0.78004979, 0.41198375, 0.98176987],
        [0.41198375, 0.98176987, 0.19560602, 0.54153093],
        [0.19560602, 0.54153093, 0.03420482, 0.07474429],
        [0.03420482, 0.07474429, 0.00493153, 0.21823705],
        [0.00493153, 0.21823705, 0.48290199, 0.55021938],
        [0.48290199, 0.55021938, 0.62271498, 0.63754244],
        [0.62271498, 0.63754244, 0.57604012, 0.79791766],
        [0.57604012, 0.79791766, 0.82829616, 0.40432693],
        [0.82829616, 0.40432693, 0.0953026 , 0.1953231 ]]),
 array([0., 0., 0., 1., 1., 1., 0., 0., 0.]))

In [91]:
p1 = "/media/tweng/ExtraDrive2/fabric_touch/bagfiles/franka_norub_folded_random/0cloth_norub_auto"
p2 = "/media/tweng/ExtraDrive2/fabric_touch/bagfiles/franka_norub_folded_random/1cloth_norub_auto"
p3 = "/media/tweng/ExtraDrive2/fabric_touch/bagfiles/franka_norub_folded_random/2cloth_norub_auto"
n=10
nn=60
cloth0_dir_names, cloth1_dir_names, cloth2_dir_names = get_cloth_dir_names(p1, p2, p3)
X,Y,x,y = get_dataset_from_dir_names(cloth0_dir_names, cloth1_dir_names, cloth2_dir_names, 10,True, minMax=False)
X,Y = create_new_set2D(X,Y,n)
x,y = create_new_set2D(x,y,n)


In [92]:
clf = neighbors.KNeighborsClassifier(nn, weights="distance")
clf.fit(X, Y.ravel())
y_pred = clf.predict(x)
score = balanced_accuracy_score(y, y_pred)
print("balanced accuracy: ",score)
# y_pred = clf.predict(x)
best_cf = confusion_matrix(y, y_pred)
print("confusion_matrix: ", best_cf)


('balanced accuracy: ', 0.9268217686444462)
('confusion_matrix: ', array([[15978,   261,    19,     7],
       [  212,  8279,    68,    24],
       [   85,   610,  6903,   599],
       [   48,     1,   625,  7567]]))


In [58]:
clf.predict([x[150]])

array([1.])