In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
import joblib
import cv2
from sklearn.preprocessing import StandardScaler


In [9]:
def data_loader(sampling, image_num, distance, features):
    """_summary_

    Args:
        image_num (int): image number to load the corresponding csv file
        distance (float): filter out distance
        features (list): list of features 
    """
    data = pd.read_csv(f'../data/processed/{sampling}/max/STD/Image_{image_num}_max_STD.csv')
    entropy = pd.read_csv(f'../data/processed/{sampling}/max/Entropy/Image_{image_num}_max_Entropy.csv')
    depth = pd.read_csv(f'../data/processed/{sampling}/max/STD/Image_{image_num}_max_STD.csv')
    
    new_df = pd.DataFrame()

    new_df['max_STD'] = data['max_STD']
    new_df['max_Entropy'] = entropy['max_Entropy']
    new_df['depth_value'] = depth['depth_value'] 
    new_df['x'] = depth['x'] 
    new_df['y'] = depth['y'] 
    
    new_df["label"] = np.where(new_df['depth_value'] <= distance, 1, 0)

    max_labels = new_df[new_df.label == 0]
    min_labels = new_df[new_df.label == 1]

    max_down = resample(max_labels,
                        replace=False,
                        n_samples=len(min_labels),     # match minority
                        random_state=42)

    df_balanced = pd.concat([max_down, min_labels])
    
    return df_balanced

In [None]:
DIR = "out"
SAMPLING = "TopDown"
# IMAGE_NUM = 5

# TopDown (Done): 
#          3CM , 5CM, 0.009CM
#          STD, Entropy, (Std, Entropy)

# Circular : 
#          3CM , 5CM, 0.009 CM
#          STD, Entropy, (Std, Entropy)

# FEATURES = ['max_STD']
# FEATURES = ['max_Entropy']
FEATURES = ['max_STD', 'max_Entropy']

DISTANCE = 0.03

feats = "_".join([i.split("_")[-1].upper() for i in FEATURES])
EXP_NAME = feats+"_"+str(DISTANCE)+"_CM"

try:
    if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME)):
        os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME))
except:
    print("Folder Exist")

accuracy_curve = []

for img_num in range(1, 101):
    # FOLDER_NAME = "IMAGE_NUM_"+str(img_num)
    if img_num == 100:
        DISTANCE = 0.029
    data = data_loader(SAMPLING, img_num, DISTANCE, FEATURES)
    X_train, X_test, y_train, y_test = train_test_split(data, data, test_size=0.2, random_state=42)

    data_len = len(X_test)

    image = np.zeros((1024, 1024))
    for i in range(data_len):
        X = int(X_test.iloc[i]['x'])
        Y = int(X_test.iloc[i]['y'])
        
        image[X, Y] = int(X_test.iloc[i]['label'])
        

    # try:
    #     if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME, FOLDER_NAME)):
    #         os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME, FOLDER_NAME))
    # except:
    #     print("Folder Exist")
    
    cv2.imwrite(os.path.join("../", DIR, SAMPLING, EXP_NAME, f"{img_num}.png"), image * 255)

# To Test the model using test dataset from CSV files

In [None]:
DIR = "out"
MODEL_DIR = "logs"
SAMPLING = "TopDown"
# IMAGE_NUM = 5

# TopDown (Done): 
#          3CM , 5CM, 0.009CM
#          STD, Entropy, (Std, Entropy)

# Circular : 
#          3CM , 5CM, 0.009 CM
#          STD, Entropy, (Std, Entropy)

# FEATURES = ['max_STD']
FEATURES = ['max_Entropy']
# FEATURES = ['max_STD', 'max_Entropy']

DISTANCE = 0.03

feats = "_".join([i.split("_")[-1].upper() for i in FEATURES])
EXP_NAME = feats+"_"+str(DISTANCE)+"_CM"

try:
    if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME)):
        os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME))
except:
    print("Folder Exist")

accuracy_curve = []

for img_num in range(1, 101):
    FOLDER_NAME = "IMAGE_NUM_"+str(img_num)
    if img_num == 100:
        DISTANCE = 0.029
    data = data_loader(SAMPLING, img_num, DISTANCE, FEATURES)
    X_train, X_test, y_train, y_test = train_test_split(data, data, test_size=0.2, random_state=42)
    model = joblib.load(os.path.join("../", MODEL_DIR, SAMPLING, EXP_NAME, FOLDER_NAME, "model.pkl")) 

  
    training_feats = np.array(X_train[FEATURES]) #['max_STD', 'max_Entropy']

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(training_feats)
    # X_test_scaled = scaler.transform(X_test)
    # print(X_test_scaled)
    # sdf


    data_len = len(X_test)
    image = np.zeros((1024, 1024))
    for i in range(data_len):
        X = int(X_test.iloc[i]['x'])
        Y = int(X_test.iloc[i]['y'])
        
        # feats = np.array([[X_test.iloc[i]['max_STD'], X_test.iloc[i]['max_Entropy']]])
        feats = np.array([[X_test.iloc[i]['max_STD']]])
        image[X, Y] = int(model.predict(scaler.transform(feats)).item())
        

    # try:
    #     if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME, FOLDER_NAME)):
    #         os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME, FOLDER_NAME))
    # except:
    #     print("Folder Exist")
    
    cv2.imwrite(os.path.join("../", DIR, SAMPLING, EXP_NAME, f"{img_num}.png"), image * 255)

In [None]:
accuracy_avg = np.load('..\logs\TopDown\STD_ENTROPY_0.03_CM\STD_ENTROPY_0.03_CM_accuracy_curve.npy')
np.mean(accuracy_avg)

0.792016603124342

## Testing Real dataset

In [3]:
import re
import glob
from PIL import Image
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [22]:
DIR = "data/damage_crop"
MODEL_DIR = "logs"
SAMPLING = "TopDown"
FEATURES = ['max_STD', 'max_Entropy']
DISTANCE = 0.03

IMAGES = sorted(glob.glob(os.path.join('../data/damage_crop/STD', '*.tiff')), key=numericalSort)

feats = "_".join([i.split("_")[-1].upper() for i in FEATURES])
EXP_NAME = feats+"_"+str(DISTANCE)+"_CM"

try:
    if not os.path.isdir(os.path.join("../", DIR, SAMPLING, EXP_NAME)):
        os.makedirs(os.path.join("../",DIR, SAMPLING, EXP_NAME))
except:
    print("Folder Exist")

accuracy_curve = []

for img_num, path in enumerate(IMAGES):
    FOLDER_NAME = "IMAGE_NUM_"+str(img_num + 1)
    if img_num+1 == 100:
        DISTANCE = 0.029
    data = data_loader(SAMPLING, img_num + 1, DISTANCE, FEATURES)
    X_train, X_test, y_train, y_test = train_test_split(data, data, test_size=0.2, random_state=42)
    model = joblib.load(os.path.join("../", MODEL_DIR, SAMPLING, EXP_NAME, FOLDER_NAME, "model.pkl")) 

  
    training_feats = np.array(X_train[['max_STD', 'max_Entropy']])

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(training_feats)




    data_len = len(X_test)
    results = np.zeros((1024, 1024))

    std_img = Image.open(path)
    path = path.replace('STD', 'entropy').replace('max_variance', 'max_entropy')
    entropy_img = Image.open(path)
    
    std_img = np.array(std_img)
    entropy_img = np.array(entropy_img)
    
    H, W = std_img.shape
    flat_std = std_img.flatten()
    flat_entropy = entropy_img.flatten()

    # Each pixel gets a feature vector: [std, std]
    feats = np.stack([flat_std, flat_entropy], axis=1)  # shape: (H*W, 2)

    # Transform and predict in batch
    scaled_feats = scaler.transform(feats)
    preds = model.predict(scaled_feats)

    # Reshape predictions back to image shape
    results = preds.reshape(H, W).astype(int)
    '''
    for X in range(1024):
        for Y in range(1024):
            feats = np.array([[std_img[X, Y], std_img[X, Y]]])
            results[X, Y] = int(model.predict(scaler.transform(feats)).item())
    '''
    cv2.imwrite(os.path.join("../", DIR, SAMPLING, EXP_NAME, f"{img_num+1}.png"), results * 255)
    #print(img_num)