In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import wisardpkg as wp
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
#descompactar o zip
if len(os.listdir('./dataset')) <= 2: 
    !unzip -qq ./dataset/HAR.zip -d ./dataset/HAR

In [3]:
def load_images(filenames, labels, folder, img_dim=28):
    images = []
    for filename in filenames:
        img = cv2.imread(os.path.join(folder, filename), cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (img_dim, img_dim))  # Reescalona a imagem para 28 x 28. Experimentem com outros tamanhos
        images.append(img.flatten())  # Achata a imagem
    return images

In [4]:
def apply_fixed_threshold(imgs, threshold = 255):
    return [list(map(int, np.round(img/threshold))) for img in imgs]

def apply_mean_threshold(imgs):
    result = []
    for img in imgs:
        img_threshold = img.mean()
        binarized_img = list(map(lambda x: 1 if x>0 else 0, np.round(img/img_threshold)))
        result.append(binarized_img)
    return result

In [5]:
def group_by(all_train_images, all_labels, groups):
    groups_images, groups_labels  = [], []
    for group in groups:
        group_images = []
        group_labels = []
            
        for image, label in zip(all_train_images, all_labels):
            if label in group:
                group_images.append(image)
                group_labels.append(label)
        groups_images.append(group_images)
        groups_labels.append(group_labels)
    return groups_images, groups_labels

In [6]:
def train(train_images, labels, num_bits_addr, multiple_cluswisard=False):
    # train_images 2d, labels 1d
    #
    # train_images 3d , labels 2d
    # [['hugging', 'fighting'], ['running', 'texting']]
    # [ Cluswisard1, Cluswisard2 ]
    
    minScore, threshold, discriminatorLimit = 0.1, 10, 5

    if multiple_cluswisard:
        tests_split = []
        models = []
        for train_image_group, labels_group in zip(train_images, labels):
            X_train, X_test, y_train, y_test = train_test_split(train_image_group, labels_group, test_size=0.2, random_state=42)
            tests_split.append((X_test, y_test))
            
            model = wp.ClusWisard(
                num_bits_addr, minScore, threshold, discriminatorLimit,
                bleachingActivated=True, ignoreZero=False, base=2,     # optional
                completeAddressing=True, verbose=False, indexes=[],     # optional
                ## types of return of classify
                returnActivationDegree=False, returnConfidence=True,   # optional
                returnClassesDegrees=False                             # optional
            )
            
            model.train(X_train, y_train)
            models.append(model)

        y_preds = []
        concat_y_test = []
        for (X_test, y_test) in tests_split:
            concat_y_test += y_test
            models_classify = []
            for model in models:
                y_pred = model.classify(X_test)
                models_classify.append(y_pred)
                
            for i in range(len(models_classify[0])):
                best_confidence = 0.0
                best_class = ''
                for model_classify in models_classify:
                    if model_classify[i]['confidence'] > best_confidence:
                        best_class = model_classify[i]['class']
                y_preds.append(best_class)

        acc = accuracy_score(concat_y_test, y_preds)
        return (models, acc)
    
    else:
        X_train, X_test, y_train, y_test = train_test_split(train_images, labels, test_size=0.2, random_state=42)
    
    
        model = wp.ClusWisard(
            num_bits_addr, minScore, threshold, discriminatorLimit,
            bleachingActivated=True, ignoreZero=False, base=2,     # optional
            completeAddressing=True, verbose=True, indexes=[],     # optional
            ## types of return of classify
            returnActivationDegree=False, returnConfidence=False,  # optional
            returnClassesDegrees=False                             # optional
        )

        model.train(X_train, y_train)

        y_pred = model.classify(X_test)

        acc = accuracy_score(y_test, y_pred)
        return (model, acc) # ((model1, model2), acc)

In [7]:
root_path = './dataset/HAR/Human_Action_Recognition/'
classes_of_interest = ['hugging', 'running', 'texting', 'fighting']
train_folder = root_path + 'train'

In [8]:
train_df = pd.read_csv(root_path + 'Training_set.csv')
filtered_df = train_df[train_df['label'].isin(classes_of_interest)]

In [9]:
hipotesis = {
    'fixed_threshold':{
        'image_shape': 100,
        'treatments': apply_fixed_threshold,
        'group': True
    },
    'mean_threshold':{
        'image_shape': 100,
        'treatments': apply_mean_threshold,
        'group': True
    }
}

In [None]:
best_model, best_accuracy, results = None, 0.0, []

groups = [['hugging', 'texting'], ['fighting','running']]

for case_name in tqdm(hipotesis):

    params = hipotesis[case_name]

    # get params
    image_shape, treatment, group = params['image_shape'], params['treatments'], params['group']
    
    train_images_binary = load_images(filtered_df['filename'].tolist(), filtered_df['label'].tolist(), train_folder, image_shape)
    train_labels = filtered_df['label'].tolist()
    
    # call pre-processing here
    train_images = treatment(train_images_binary)
    
    if group:
        train_images, train_labels = group_by(train_images, train_labels, groups)

    # train
    for num_bits_addr in range(2, 65):
        (model, accuracy) = train(
            train_images=train_images,
            labels=train_labels,
            num_bits_addr=num_bits_addr,
            multiple_cluswisard=group
        )
        result = {
            case_name: params,
            'accuracy': accuracy,
            'num_bits_addr':num_bits_addr,
        }

        results.append(result)

        if accuracy > best_accuracy:
            print('='*15)            
            print(f'best acc by now {accuracy} with {num_bits_addr}')
            best_result, best_model, best_accuracy = result, model, accuracy
print(f'current champion is {best_model}')

  0%|                                                                                                                          | 0/2 [00:00<?, ?it/s]

best acc by now 0.2619047619047619 with 2
best acc by now 0.34077380952380953 with 7
best acc by now 0.3556547619047619 with 8
best acc by now 0.36755952380952384 with 11
best acc by now 0.36904761904761907 with 12
best acc by now 0.37202380952380953 with 13


 50%|████████████████████████████████████████████████████████▌                                                        | 1/2 [09:21<09:21, 561.95s/it]

In [None]:
num_bits_addr = 10
model = wp.Wisard(num_bits_addr, returnConfidence=True)
imgs_t = apply_mean_threshold(train_images_binary)
X_train, X_test, y_train, y_test = train_test_split(imgs_t, train_labels, test_size=0.2, random_state=42)
model.train(X_train, y_train)
y_pred = model.classify(X_test)
y_pred[0]
#acc = accuracy_score(y_test, y_pred)
#print('acc: ', acc)
#ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
#plt.show()