In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.special import softmax
from matplotlib.colors import Normalize
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [None]:
per_image = True
project = 'poreseg'

## Use sections = None for all images (except "total"). Use [<section>_<depth>, ...] for individual images
sections = None #['Dx_6295', 'Dx_6295']
models = ['4f85f65d82694051b2cab822']

all_sections = sections is None
if per_image:
    print('Individual images only. General (total) per-model results ignored.')
else:
    print('General (total) per-model results only. Individual images ignored.')

In [None]:
element_data = pd.read_csv(os.path.join(os.sep, 'petrobr', 'parceirosbr', 'smartseg', 'datasets', project, 'unified_labels.csv'))

In [None]:
models_log = {
    'elements': [],
    'val_proportions': [],
    'out_proportions': [],
    'color_hex': [],
    'confusion_matrix': []
}

combinations = []
for model in models:
    model = model.replace('Run: ', '')
    
    if all_sections:
        sections = os.listdir(os.path.join(project, 'models_log', model))

    for section in sections:
        log_path = os.path.join(project, 'models_log', model, section)
        if not os.path.exists(log_path) or (section == 'total') == per_image:
            continue
        
        combinations.append((model, section))
        log = pd.read_csv(os.path.join(log_path, 'log.csv'))
        if os.path.exists(log_path):
            models_log['elements'].append(log['elements'])
            models_log['val_proportions'].append(log['val_proportions'])            
            models_log['out_proportions'].append(log['out_proportions'])            
            models_log['color_hex'].append(log['color_hex'])
            models_log['confusion_matrix'].append(log[[c for c in log.columns if 'Pred.' in c]])

for key in models_log:
    models_log[key] = np.array(models_log[key])

In [None]:
plt.figure(figsize = (8, 8))
experiment = 'poro x não-poro'
for i, element in enumerate(models_log['elements'][0]):
    if element not in ['Desconhecido', 'Poros']:
        experiment = 'fases minerais'
    
    elem_val_props = models_log['val_proportions'][:, i]
    elem_out_props = models_log['out_proportions'][:, i]
    
    scatter_edgecolor = None
    plot_color = models_log['color_hex'][0, i]
    linestyle = '-'
    if plot_color == '#ffffff':
        scatter_edgecolor = 'black'
        plot_color = '#000000'
        linestyle = '-.'
        
    plt.scatter(100*elem_val_props, 100*elem_out_props, color = models_log['color_hex'][:, i], label = element, edgecolors = scatter_edgecolor)
    lin_reg = LinearRegression().fit(elem_val_props.reshape(-1, 1), elem_out_props)
    a, b = lin_reg.coef_[0], lin_reg.intercept_
    x = np.arange(elem_val_props.min(), elem_val_props.max() + 0.01, 0.01)
    plt.plot(100*x, 100*(a*x + b), color = plot_color, label = element, linestyle = linestyle)

elem_val_props = models_log['val_proportions'].flatten()
elem_out_props = models_log['out_proportions'].flatten()
    
lin_reg = LinearRegression().fit(elem_out_props.reshape(-1, 1), elem_val_props)
a, b = lin_reg.coef_[0], lin_reg.intercept_
x = np.arange(elem_val_props.min(), elem_val_props.max() + 0.01, 0.01)
plt.plot(100*x, 100*(a*x + b), color = 'red', label = 'Total', linestyle = '--')    

R2 = lin_reg.score(elem_out_props.reshape(-1, 1), elem_val_props)
RMSE = mean_squared_error(elem_val_props, a * elem_out_props + b, squared = False)

plt.title('Correlação - ' + experiment + '\nR² =' + '{:.4f}'.format(R2) + '\nRMSE = ' + '{:.4f}'.format(RMSE))
plt.xlabel('Ocorrências verdadeiras (%)')
plt.ylabel('Ocorrências no(s) modelo(s) preditivo(s) (%)')
plt.legend()
plt.show()

In [None]:
import seaborn as sns

n_combinations = len(combinations)

subplot_cols = min(n_combinations, 3)
subplot_rows = int(np.ceil(n_combinations/subplot_cols))

plt.figure(figsize = (max(16, int(1.5*n_combinations)), max(16, int(1.5*n_combinations))))
plt.suptitle('Matrizes de confusão')
for mi in range(n_combinations):
    subplot_i = mi + 1
    plt.subplot(subplot_rows, subplot_cols, subplot_i)
    sns.heatmap(models_log['confusion_matrix'][mi], annot = True, cmap = 'gray',
                xticklabels = list(models_log['elements'][mi]), yticklabels = list(models_log['elements'][mi]))
    #plt.imshow(models_log['confusion_matrix'][mi], cmap = 'gray', norm = Normalize(0, 1))
    #if  mi+subplot_cols >= n_combinations:
    #    plt.xticks(range(len(models_log['elements'][mi])), list(models_log['elements'][mi]), rotation = 'vertical')
    #else:
    #    plt.xticks([])
    #if mi%subplot_cols == 0:
    #    plt.yticks(range(len(models_log['elements'][mi])), list(models_log['elements'][mi]))
    #else:
    #    plt.yticks([])
    
    model, key = combinations[mi]
    plt.title('Model: ' + model[:3] + '...' + model[-3:] + '\n' + key)
plt.show()

In [None]:
dice = np.random.uniform(size = 53)
images = [str(i) for i in range(53)]

plt.figure(figsize = (12, 12))
plt.bar(range(len(dice)), dice)
plt.axhline(y = np.mean(dice), color = 'orange', label = 'Média')
plt.xticks(range(len(dice)), labels = images, rotation = 'vertical')
plt.title('Validação Leave-One-Out')
plt.xlabel('Imagem')
plt.ylabel('Dice')
plt.legend()
plt.show()