Insert the following packages before running the code:

1/ cv2
2/ tqdm
3/ keras

In [1]:
''' Import key libraries'''
import cv2 # using opencv to process image
from tqdm import tqdm  # use tqdm to know the process of the iteration
import numpy as np
import pandas as pd
import os         
import random
import matplotlib.pyplot as plt
import time

''' Preprocessing and Cross validation libraries'''

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import StratifiedKFold

''' P libraries'''
from sklearn.metrics import accuracy_score 
from sklearn.preprocessing import label_binarize
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import np_utils


Using Theano backend.


In [2]:
%matplotlib inline

train_path = r'C:\Users\phuph\Desktop\Spiderdatabase\spidertrainset'
csv_path = r'C:\Users\phuph\Desktop\Spiderdatabase\spider_csv.csv'
#test_path = 'test'

size_img = 50

In [3]:
""" From the image, record the label either dangerous or non"""
def record_label(image_name):
    label = image_name.split('.')[0]
    if label == 'dangerous':
        return 1
    elif label == 'non':
        return 0

In [4]:
""" From the image, create the train data after resizing the image"""

def create_train():
    training_data = []
    for img in tqdm(os.listdir(train_path)):
        path = os.path.join(train_path, img)
        img_data = cv2.imread(path, cv2.IMREAD_COLOR)
        img_data = cv2.resize(img_data, (size_img, size_img))
        training_data.append([img_data, record_label(img) ])
    #np.save('train_data.npy', training_data)
    return training_data


In [5]:
train = create_train()
#train = create_train()

100%|██████████████████████████████████████████████████████████████████████████████| 1005/1005 [00:11<00:00, 89.66it/s]


In [6]:
A_train = []
for i in train:
    a = i[0].reshape(-1)
    A_train.append(a)
A_train = np.asarray(A_train)
B_train = np.asarray([i[1] for i in train])

In [7]:

X_train0, X_test0, Y_train, Y_test = train_test_split(A_train, B_train, test_size=0.2, random_state=0)



In [8]:
scaler = StandardScaler()
X_train1 = scaler.fit_transform(X_train0)
X_test1 = scaler.fit_transform(X_test0)

pca = PCA(n_components=20)
X_train = pca.fit_transform(X_train1)
X_test= pca.transform(X_test1)



In [9]:
''' Induce the CCN flip rate'''
import random
rho_po_list = [0.1,0.2,0.3,0.4,0.4]
rho_ne_list = [0.3,0.1,0.4,0.1,0.4]
Y_new_list = []
for i in range(5):
    rho_po = rho_po_list[i]
    rho_ne = rho_ne_list[i]
    Y_temp = np.copy(Y_train)
    for j in range(len(Y_temp)):
        if Y_train[j] ==1:
            temp = random.random()
            if temp < rho_po:
                Y_temp[j] = 0
        else:
            t = random.random()
            if t < rho_ne:
                Y_temp[j] = 1
    Y_new_list.append([Y_temp,rho_po,rho_ne])

In [10]:
''' Neural network model'''
dims = X_train.shape[1]
n_y = 2
print( 'Number of features: %d ' %dims)
print( 'Number of classes: %d' %n_y)
print("Building model...")

model = Sequential()
model.add(Dense(80, input_shape=(dims,)))
model.add(Dense(60, input_shape=(dims,)))
model.add(Dense(40, input_shape=(dims,)))
model.add(Dense(n_y, input_shape=(dims,)))
model.add(Activation('softmax'))
model.compile(optimizer='sgd', loss='categorical_crossentropy',sample_weight_mode=None,metrics=['accuracy'])


Number of features: 20 
Number of classes: 2
Building model...


In [11]:
''' Baseline - Accuracy'''

accuracy_list =[]
count =0
for i in Y_new_list:
    y= np_utils.to_categorical(i[0], 2)
    model.fit(X_train,y, verbose=1,epochs=1)
    pred = model.predict_classes(X_test)
    #pred = pred0.argmax(axis=-1)
    a = accuracy_score(Y_test,pred)
    accuracy_list.append([count,a,i[1],i[2]])
    count = count +1

for i in accuracy_list:
  #print("\n Round %s \n==============================\n" %i[0][0])
  print(i)


Epoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0s[0, 0.58208955223880599, 0.1, 0.3]
[1, 0.54228855721393032, 0.2, 0.1]
[2, 0.56218905472636815, 0.3, 0.4]
[3, 0.46766169154228854, 0.4, 0.1]
[4, 0.51243781094527363, 0.4, 0.4]


In [12]:
''' Nat13 - Accuracy'''

accuracy_list =[]
count =0
for i in Y_new_list:
    alpha = float(1 - i[1] + i[2]) / 2
    y = i[0]
    sample_weight = (1-alpha)*np.ones(np.shape(y)) 
    sample_weight[y==0] = alpha
    y= np_utils.to_categorical(i[0], 2)
    model.fit(X_train,y,sample_weight=sample_weight, verbose=1,epochs=1)
    pred = model.predict_classes(X_test)
    #pred = pred0.argmax(axis=-1)
    a = accuracy_score(Y_test,pred)
    accuracy_list.append([count,a,i[1],i[2]])
    count = count +1
for i in accuracy_list:
    print(i)

Epoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0sEpoch 1/1
 32/201 [===>..........................] - ETA: 0s[0, 0.57213930348258701, 0.1, 0.3]
[1, 0.58706467661691542, 0.2, 0.1]
[2, 0.57213930348258701, 0.3, 0.4]
[3, 0.57213930348258701, 0.4, 0.1]
[4, 0.55223880597014929, 0.4, 0.4]


In [14]:
''' ILN - Baseline '''


accuracy_list =[]
df_noise = pd.read_csv(csv_path)
C_train = np.asarray(df_noise["Rater label"])

skf = StratifiedKFold(n_splits=10)
for train_index,test_index in skf.split(A_train,C_train):
    x_train0, x_test0 = A_train[train_index], A_train[test_index]
    y_train, y_test = C_train[train_index], C_train[test_index]
    x_train1 = scaler.fit_transform(x_train0)
    x_test1 = scaler.fit_transform(x_test0)

    x_train = pca.fit_transform(x_train1)
    x_test= pca.transform(x_test1)
    y= np_utils.to_categorical(y_train, 2)
    model.fit(x_train, y,verbose=1, epochs=1)
    pred = model.predict_classes(x_test)
    a = accuracy_score(y_test,pred)
    accuracy_list.append([a])
    
print(np.mean(accuracy_list,axis=0),np.std(accuracy_list,axis=0))



Epoch 1/1


In [15]:
''' ILN - Nat13 Accuracy '''
from sklearn.model_selection import StratifiedKFold

accuracy_list =[]
df_noise = pd.read_csv(csv_path)
C_train = np.asarray(df_noise["Rater label"])

rho_po_list = [0.2,0.3,0.4,0.1]
rho_ne_list = [0.2,0.1,0.4,0.3]
skf = StratifiedKFold(n_splits=10)
for train_index,test_index in skf.split(A_train,C_train):
    x_train0, x_test0 = A_train[train_index], A_train[test_index]
    y_train, y_test = C_train[train_index], C_train[test_index]
    x_train1 = scaler.fit_transform(x_train0)
    x_test1 = scaler.fit_transform(x_test0)

    x_train = pca.fit_transform(x_train1)
    x_test= pca.transform(x_test1)
    rho_po = random.choice(rho_po_list)
    rho_ne = random.choice(rho_ne_list)
    
    alpha = float(1 - rho_po + rho_ne) / 2
    y = y_train
    sample_weight = (1-alpha)*np.ones(np.shape(y)) 
    sample_weight[y==0] = alpha
    
    y= np_utils.to_categorical(y_train, 2)
    model.fit(x_train, y,sample_weight = sample_weight,verbose=1, epochs=1)
    pred = model.predict_classes(x_test)
    a = accuracy_score(y_test,pred)
    accuracy_list.append([a])
    
print(np.mean(accuracy_list,axis=0),np.std(accuracy_list,axis=0))




Epoch 1/1
