In [1]:
import tensorflow as tf
import keras
import os
import numpy as np
import pandas as pd
from scipy.stats import ranksums,ttest_rel,wilcoxon
import pathlib
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline

In [2]:
from utils import process_unknown, predict_unknown, unsup_and_sup

In [3]:
clustering_folder = r"" # Folder where unsupervised clustering results are stored.
testing_data = r"" # Folder where testing data is stored
fine_tune_folder = r"" # Folder where fine-tuned data is stored
original_model = r"" # Desired checkpoint file from 'gan_training'
supervised_model = r"" # Supervised model from 'supervised_cluster_cnn'

SHAPE = [476,476,3]
number_samples = 50    # Number of samples to take from each test set

In [None]:
real_path = os.path.join(testing_data,"real_images")
fake_path = os.path.join(testing_data,"generated_images")
dalle_path = os.path.join(testing_data,"DALLE imagery")
paths = [real_path,fake_path,dalle_path]

outlist = []
for path, subdirs, files in os.walk(clustering_folder):
    for sub in subdirs:
        class_folder = os.path.join(path,sub)
        i = 0
        for name in os.listdir(class_folder):
            if i<number_samples:
                outlist.append(os.path.join(path, sub, name))
                i+=1
            else:
                break
output = {}
for classx in range(18):
    class_holding = []
    for file in outlist:
        if file.split('\\')[-2] == f'class_{classx}':
            class_holding.append(file)
    count = sum([unsup_and_sup(file,supervised_model) for file in class_holding])
    output[classx] = count/number_samples

In [None]:
fig, ax = plt.subplots(figsize=(12,7))
bar_container = ax.bar([x+1 for x in list(output.keys())],[v*100 for v in output.values()],color=(0.2, 0.4, 0.6, 0.6))
ax.bar_label(bar_container, fmt='%2.f%%');
ax.set_xticks([x for x in range(1,19)]);
ax.set_xlabel("Class Number",fontsize=12);
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = 12
plt.title("Percentage of Top-3 Supervised Predictions of Unsupervised Class Label")
plt.savefig('x50_unsuptest')

In [None]:
# from sklearn.inspection import plot_partial_dependence
# disp=plot_partial_dependence(lstm_model, X_train, target=1, verbose =1, features=[0,1,2,3,4],feature_names=f_columns)

In [None]:
# Gather class-specific models
def assess(folder):
    
    files = []
    for r, d, f in os.walk(os.path.abspath(folder)):
        for file in f:
            files.append(os.path.join(r, file)) 
    
    output = {}
    processed = process_unknown(files,supervised_model)
    # print(processed)
    for name, class_predictions in processed.items():
        result,weight = [],[]
        for class_match, percentage in class_predictions.items():
            model = os.path.join(fine_tune_folder,rf"ft_class_{class_match}\class_{class_match}_ft-1")
            try:
                predict = predict_unknown(os.path.join(folder,name),model,SHAPE)
                result.append(predict*percentage)
                weight.append(percentage)
                # print(class_match,predict,percentage)
            except Exception as e:
                pass
        original_prediction = predict_unknown(os.path.join(folder,name),original_model,SHAPE)
        output[name] = {'original':np.round(original_prediction,3), "fine tuned":np.round(sum(result)/(sum(weight)+0.000001),3)}
    return output

In [None]:
results = [assess(path) for path in paths]

In [None]:
holding = []
for i,f in enumerate(results):
    dfx = pd.DataFrame.from_dict(f,'index')
    dfx['difference'] = (dfx['fine tuned']-dfx['original'])
    print(np.mean(dfx['original']),np.mean(dfx['fine tuned']))
    # print(ttest_rel(dfx['original'],dfx['fine tuned']))
    # print(wilcoxon(dfx['original'],dfx['fine tuned'],alternative='two-sided'))
    if i == 0:
        print(wilcoxon(dfx['original'],dfx['fine tuned'],alternative='less'))
        print(wilcoxon(dfx['original'],dfx['fine tuned'],alternative='two-sided'))
    else:
        print(wilcoxon(dfx['original'],dfx['fine tuned'],alternative='greater'))
        print(wilcoxon(dfx['original'],dfx['fine tuned'],alternative='two-sided'))
    print(np.mean(dfx['difference']),'\n')
    holding.append(dfx)
df = pd.concat(holding,axis=0)
df

In [None]:
df.to_csv('x200_with_2.0_wilcoxon.csv')