# Nome: Rafael Silva Del Lama
# Email: rafael.lama@usp.br

# Desafio: 02-FraudDetection

A abordagem adotada foi treinar um modelo de Rede Neural Convolucional que receba como entrada uma imagem de assinatura de referencia e uma imagem de assinatura questionada, e classifique a assinatura questionada como fraude, genuína ou disfarçada. 

Durante o treinamento, cada assinatura questionada foi apresentada a rede com todas as assinaturas de referencia. Já no teste, cada assinatura questionada será avaliada com cada uma das assinaturas de referencia e a classe atribuida a assinatura questionada será definida por voto majoritario.

### Test set

The signature collection for testing contains 125 signatures. The signatures comprise:
    - 25 reference signatures by the same writer “B”;
    - 100 questioned signatures
        - 3 genuine signatures written by the reference writer in his/her normal signature style
        - 90 simulated signatures (written by 34 forgers freehand copying the signature characteristics of the reference writer); 
        - 7 disguised signatures written by the reference writer.
        
        
The signatures of the test set are arranged according to the following folder structure:

- **Reference:** Contains the reference signatures of another specimen author ‘B’. These signatures are used to train the classifier for verifying authorship of the specimen author ‘B’.
- **Questioned:** Contains all the signatures for which the task is to verify authorship of the specimen author ‘B’.


## Definição de Funções

### Função responsável por fazer a leitura das imagens.

Parâmetros:
    directory é o diretório contendo as contendo as pastas de cada classe com as respectivas imagens
    input_shape (img_width, img_height, n_chanel)
    
Retornar 3 numpyArrays: 
    1º numpyArray contendo as informações das imagens
    2º numpyArray contendo as classes
    3º numpyArray contendo os labels das imagens

In [None]:
def read_images_from_directory(directory, input_shape) :
    import glob
    import numpy as np
    from PIL import Image
    from scipy import misc
    import cv2
    

    ## Leitura da base de dados
    classes = glob.glob (directory + "/*" ) ## Array contendo o diretorio de cada classe
    print(classes)
    X = []
    y = []
    labels = []
    for i in range(len(classes)): # Para cada diretorio, lê todas as imagens
        files = glob.glob (classes[i] + "/*") 
        for myFile in files: 
            im = Image.open(myFile)
            image = np.array(im)
            image_resized = misc.imresize(image, input_shape)
            
            if(len(image_resized.shape) == 3 and input_shape[2] == 1):
                image_resized = cv2.cvtColor(image_resized, cv2.COLOR_BGR2GRAY)
                        
            X.append (image_resized) 
            y.append (classes[i] .split('\\')[1])  ## Array contendo o labels das classes
            labels.append(myFile.split('\\')[2].split('.')[0])
            
    del directory, classes, i, files, myFile, image # Remove as variaveis que nao serao mais utilizadas
    
    return np.asarray(X), np.asarray(y), np.asarray(labels)

## Definição de Funções

### Função responsável por fazer a leitura das imagens.

Parâmetros:
    directory é o diretório contendo as contendo as pastas de cada classe com as respectivas imagens
    input_shape (img_width, img_height, n_chanel)
    
Retornar 3 numpyArrays: 
    1º numpyArray contendo as informações das imagens
    2º numpyArray contendo as classes
    3º numpyArray contendo os labels das imagens

In [None]:
def read_images_from_directory(directory, input_shape) :
    import glob
    import numpy as np
    from PIL import Image
    from scipy import misc
    import cv2
    

    ## Leitura da base de dados
    classes = glob.glob (directory + "/*" ) ## Array contendo o diretorio de cada classe
    print(classes)
    X = []
    y = []
    labels = []
    for i in range(len(classes)): # Para cada diretorio, lê todas as imagens
        files = glob.glob (classes[i] + "/*") 
        for myFile in files: 
            im = Image.open(myFile)
            image = np.array(im)
            image_resized = misc.imresize(image, input_shape)
            
            if(len(image_resized.shape) == 3 and input_shape[2] == 1):
                image_resized = cv2.cvtColor(image_resized, cv2.COLOR_BGR2GRAY)
                        
            X.append (image_resized) 
            y.append (classes[i] .split('\\')[1])  ## Array contendo o labels das classes
            labels.append(myFile.split('\\')[2].split('.')[0])
            
    del directory, classes, i, files, myFile, image # Remove as variaveis que nao serao mais utilizadas
    
    return np.asarray(X), np.asarray(y), np.asarray(labels)

Leitura da rede e dos pesos 

In [None]:
import numpy as np
import pandas as pd
from keras.models import model_from_json

arquivo = open('nuveo.json', 'r')
estrutura_rede = arquivo.read()
arquivo.close()

classificador = model_from_json(estrutura_rede)
classificador.load_weights('nuveo.h5')

In [None]:
# dimensions of our images.
img_width = 200
img_height = 200
img_chanels = 1

test_dir = 'candidate-data/02-FraudDetection/TestSet'

input_shape = (img_width, img_height, img_chanels) 

classes = ['Disguise', 'Genuine', 'Simulated']

In [None]:
X_imagens_test, y_imagens_test, labels_imagens_test = read_images_from_directory(test_dir, input_shape)

X_imagens_test = X_imagens_test.reshape(X_imagens_test.shape[0], input_shape[0], input_shape[1], input_shape[2])

X_imagens_test = X_imagens_test / 255

X_reference_test = X_imagens_test[y_imagens_test == 'Reference']
y_reference_test = y_imagens_test[y_imagens_test == 'Reference']

X_questioned_test = X_imagens_test[y_imagens_test != 'Reference']
y_questioned_test = y_imagens_test[y_imagens_test != 'Reference']

print('Images Reference test: {}'.format(X_reference_test.shape))
print('Images questioned test: {}'.format(X_questioned_test.shape))

Cada assinatura questionada será avaliada com todas as assinaturas de referencia. 
A classe atribuida a assinatura será a classe majoritária.

In [None]:
predictions = []

for i in range(len(X_questioned_test)):
    D = 0
    G = 0
    F = 0
    
    for j in range(len(X_reference_test)):
        predict = classificador.predict([X_reference_test[j].reshape(1, input_shape[0],input_shape[1], 1), 
                               X_questioned_test[i].reshape(1, input_shape[0],input_shape[1], 1)])
        
        predict = np.argmax(predict)
        
        if predict == 0:
            D = D + 1
        elif predict == 1:
            G = G + 1
        else:
            F = F + 1
            
            
    ## Majoritarian predict
    if D >= G and D >= F:
        predict = 0
        
    elif G >= D and G >= F:
        predict = 1
        
    else:
        predict = 2
        
    predictions.append({"signature": labels_imagens_test[i], "class": classes[predict],
                       'Disguise': D / len(X_reference_test), 'Genuine': G / len(X_reference_test), 
                       'Simulated': F / len(X_reference_test)})

In [None]:
import pandas as pd

predictions = pd.DataFrame(predictions, columns=['signature', 'class', 'Disguise', 'Genuine', 'Simulated'] )

In [None]:
predictions

In [None]:
print("Simulated: {}".format(len(predictions[predictions['class'] == 'Simulated'])))
print("Disguise: {}".format(len(predictions[predictions['class'] == 'Disguise'])))
print("Genuine: {}".format(len(predictions[predictions['class'] == 'Genuine'])))

In [None]:
predictions.to_csv('predictions.csv', index=False)