In [5]:
import pandas as pd
import numpy as np
import os
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
import joblib
import cv2
from sklearn.preprocessing import StandardScaler

In [23]:
def data_loader(sampling, image_num):
    """_summary_

    Args:
        image_num (int): image number to load the corresponding csv file
        distance (float): filter out distance
        features (list): list of features 
    """
    data = pd.read_csv(f'../data/processed/{sampling}/max/STD/Image_{image_num}_max_STD.csv')
    # entropy = pd.read_csv(f'../data/processed/{sampling}/max/Entropy/Image_{image_num}_max_Entropy.csv')
    depth = pd.read_csv(f'../data/processed/{sampling}/max/STD/Image_{image_num}_max_STD.csv')
    
    new_df = pd.DataFrame()

    new_df['max_STD'] = data['max_STD']
    # new_df['max_Entropy'] = entropy['max_Entropy']
    new_df['depth_value'] = depth['depth_value'] 
    new_df['x'] = depth['x'] 
    new_df['y'] = depth['y'] 
    new_df['avg_value'] = depth['avg_value'] 
    new_df["label"] = new_df['depth_value']

    max_labels = new_df[new_df.label == 0]
    min_labels = new_df[new_df.label == 1]

    max_down = resample(max_labels,
                        replace=False,
                        n_samples=len(min_labels),     # match minority
                        random_state=42)

    df_balanced = pd.concat([max_down, min_labels])
        
    X = df_balanced
    y = df_balanced['label']
    
    return X, y

In [None]:
DIR = "out"
SAMPLING = "TopDown"
# IMAGE_NUM = 5

# TopDown (Done): 
#          3CM , 5CM, 0.009CM
#          STD, Entropy, (Std, Entropy)

# Circular : 
#          3CM , 5CM, 0.009 CM
#          STD, Entropy, (Std, Entropy)

# FEATURES = ['max_STD']
# FEATURES = ['max_Entropy']
FEATURES = ['max_STD', 'avg_value', 'GT']

DISTANCE = 0.03

feats = "_".join([i.split("_")[-1].upper() for i in FEATURES])
EXP_NAME = feats+"_"+str(DISTANCE)+"_CM"

try:
    if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME)):
        os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME))
except:
    print("Folder Exist")

accuracy_curve = []

for img_num in range(1, 101):
    # FOLDER_NAME = "IMAGE_NUM_"+str(img_num)
    if img_num == 100:
        DISTANCE = 0.029
    X, y  = data_loader(SAMPLING, img_num)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # X_test.to_csv('../out/TopDown/mona.csv')
    image = np.zeros((1024, 1024))
    for i in range(len(X_test)):
        X = int(X_test.iloc[i]['x'])
        Y = int(X_test.iloc[i]['y'])
        image[X, Y] = int(X_test.iloc[i]['label'])
    
    cv2.imwrite(os.path.join("../", DIR, SAMPLING, EXP_NAME, f"{img_num}.png"), image * 255)
    # cv2.imwrite('../out/TopDown/mona.png', image * 255)

## Compute accuracy for used pixels in testing - the 1024x1024 spatial dimension is not included

In [56]:
import cv2

DIR = "out"
SAMPLING = "TopDown"
FEATURES = ['max_STD']

DISTANCE = 0.03

feats = "_".join([i.split("_")[-1].upper() for i in FEATURES])
EXP_NAME = feats+"_"+str(DISTANCE)+"_CM"

try:
    if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME)):
        os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME))
except:
    print("Folder Exist")

all_labels = []
all_preds = []

for img_num in range(1, 101):
    # FOLDER_NAME = "IMAGE_NUM_"+str(img_num)
    IMAGE_NAME = str(img_num)+".png"
    if img_num == 100:
        DISTANCE = 0.029
    X, y  = data_loader(SAMPLING, img_num)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pred_image = cv2.imread(os.path.join("../",DIR, SAMPLING, EXP_NAME, IMAGE_NAME), cv2.IMREAD_GRAYSCALE) / 255
    
    for i in range(len(X_test)):
        X = int(X_test.iloc[i]['x'])
        Y = int(X_test.iloc[i]['y'])
        all_labels.append(int(X_test.iloc[i]['label']))
        all_preds.append(int(pred_image[X, Y]))
        # print(X, Y, int(pred_image[X, Y]), int(X_test.iloc[i]['label']), os.path.join("../",DIR, SAMPLING, EXP_NAME, IMAGE_NAME))



In [57]:
    
accuracy = (np.array(all_labels) == np.array(all_preds) ).sum() / len(all_preds)
print(accuracy)

0.7283542079995153


In [None]:
# STD + AVG = 73.2%
# STD + Entropy = 73.6%
# STD = %

792348


In [50]:
print(all_labels[:20])
print(all_preds[:20])

[0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0]
