In [None]:
import json
import numpy as np
from datetime import datetime


all_accuracies = []
all_f1_scores = []
all_test_results = []
all_results = []
user_folders = [d for d in os.listdir(data_root) 
                if os.path.isdir(os.path.join(data_root, d)) and 
                d.endswith('_results_final')]

user_initials = [d.split('_results_final')[0] for d in user_folders]

validation_dir='DTT_results_final/DTT_inverted'

print("User folders:", user_folders)
print("User initials:", user_initials)


validation_generator = datagen.flow_from_directory(
    validation_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=True
)

for test_user in user_initials:
    if test_user == "DTT":
        print("SKIPPING USER DTT because he is part of validation set!")
        continue
    print(f"\n{'='*60}")
    print(f"LEAVE-ONE-USER-OUT: Testing on user {test_user}")
    print(f"{'='*60}")
    
    train_dir, test_dir = get_user_split_paths(test_user)
    
    if not os.path.exists(train_dir) or not os.path.exists(test_dir):
        print(f"Split directories for {test_user} not found!!!")
        continue
    
    print(f"Train directory: {train_dir}")
    print(f"Test directory: {test_dir}")
    
    train_generator = datagen.flow_from_directory(
        train_dir,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=True
    )
    
    test_generator = datagen.flow_from_directory(
        test_dir,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=False
    )
    
    print(f"Found {train_generator.samples} TRAINING images belonging to {train_generator.num_classes} classes.")
    print(f"Found {test_generator.samples} TEST images belonging to {test_generator.num_classes} classes.")
    print(f"Using {validation_generator.samples} VALIDATION images belonging to {validation_generator.num_classes} classes.")
    
    print(f"Training with {train_generator.samples} images, testing on {test_generator.samples} images.")
    
    model = create_model()
    model.load_weights('proper_split_server.weights.h5')
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    f1_callback = F1ScoreCallback(validation_generator)
    
    print(f"\nTraining model for test user: {test_user}")

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True,
        verbose=1
    )
    history = model.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=validation_generator,
        callbacks=[f1_callback, early_stopping]
    )
    
    print(f"\nEvaluating model for test user: {test_user}")
    results = model.evaluate(test_generator, verbose=0)
    test_accuracy = results[1]
    test_loss = results[0]
    final_f1 = f1_callback.f1_scores[-1] if f1_callback.f1_scores else 0.0
    
    print(f"Test User {test_user} - Final Accuracy: {test_accuracy:.4f}, Final F1 od callbackot: {final_f1:.4f}")

    print(f"\nEvaluating model on test data for user: {test_user}")
    
    y_true = []
    y_pred = []
    for i in range(len(test_generator)):
        x_batch, y_batch = test_generator[i]
        preds = model.predict(x_batch, verbose=0)
        y_true.extend(np.argmax(y_batch, axis=1))
        y_pred.extend(np.argmax(preds, axis=1))

    test_accuracy = np.mean(np.array(y_true) == np.array(y_pred))
    test_f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)
    test_f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"Test User {test_user} -> Accuracy: {test_accuracy:.4f}, Macro F1: {test_f1_macro:.4f}, Weighted F1: {test_f1_weighted:.4f}")
    
    all_test_results.append({
        'user': test_user,
        'accuracy': float(test_accuracy),
        'f1_macro': float(test_f1_macro),
        'f1_weighted': float(test_f1_weighted),
        'epochs_trained': len(history.history['loss'])
    })
    
    all_accuracies.append(test_accuracy)
    all_f1_scores.append(final_f1)
    all_results.append({
        'user': test_user,
        'accuracy': float(test_accuracy),  
        'F1 score': float(test_f1_macro),
        'f1_weighted': float(test_f1_weighted),
        'callback f1_score': float(final_f1),
        'loss': float(test_loss),
        'training_samples': int(train_generator.samples),
        'test_samples': int(test_generator.samples),
        'training_history': {
            'epochs': len(history.history['loss']),
            'final_train_loss': float(history.history['loss'][-1]),
            'final_val_loss': float(history.history['val_loss'][-1]) if 'val_loss' in history.history else None,
            'final_train_accuracy': float(history.history['accuracy'][-1]),
            'final_val_accuracy': float(history.history['val_accuracy'][-1]) if 'val_accuracy' in history.history else None
        }
    })

    print(f"{'='*50}")
    print(f"user: {test_user}")
    print(f"accuracy: {float(test_accuracy):.4f}")
    print(f"f1_score: {float(final_f1):.4f}")
    print(f"loss: {float(test_loss):.4f}")
    print(f"training_samples: {int(train_generator.samples)}")
    print(f"test_samples: {int(test_generator.samples)}")
    print(f"epochs: {len(history.history['loss'])}")
    print(f"final_train_loss: {float(history.history['loss'][-1]):.4f}")
    print(f"final_val_loss: {float(history.history['val_loss'][-1]) if 'val_loss' in history.history else 'None'}")
    print(f"final_train_accuracy: {float(history.history['accuracy'][-1]):.4f}")
    print(f"final_val_accuracy: {float(history.history['val_accuracy'][-1]) if 'val_accuracy' in history.history else 'None'}")
    
    model.save_weights(f"out_user_{test_user}_result.weights.h5")
    print(f"Saved weights: out_user_{test_user}_result.weights.h5")
    print(f"{'='*50}")
    
    class_indices = test_generator.class_indices
    with open(f"out_user_{test_user}_class_indices.json", 'w') as f:
        json.dump(class_indices, f)
    
    del model
    tf.keras.backend.clear_session()

all_accuracies = np.array(all_accuracies)
all_f1_scores = np.array(all_f1_scores)

print(f"\n{'='*80}")
print("CALCULATING CROSS-VALIDATION STATISTICS")
print(f"{'='*80}")

mean_accuracy = np.mean(all_accuracies)
std_accuracy = np.std(all_accuracies)
mean_f1 = np.mean(all_f1_scores)
std_f1 = np.std(all_f1_scores)
min_accuracy = np.min(all_accuracies)
max_accuracy = np.max(all_accuracies)
min_f1 = np.min(all_f1_scores)
max_f1 = np.max(all_f1_scores)

print("\nPer-user results:")
for result in all_results:
    print(f"User {result['user']}: Accuracy = {result['accuracy']:.4f}, F1 = {result['f1_score']:.4f}")

print(f"\nOverall Statistics:")
print(f"Mean Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
print(f"Mean F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
print(f"Min Accuracy: {min_accuracy:.4f}")
print(f"Max Accuracy: {max_accuracy:.4f}")
print(f"Min F1 Score: {min_f1:.4f}")
print(f"Max F1 Score: {max_f1:.4f}")
print(f"Accuracy Range: {max_accuracy - min_accuracy:.4f}")
print(f"F1 Score Range: {max_f1 - min_f1:.4f}")

print(f"\n{'='*80}")
print("RESULTS SAVED TO JSON")

print(f"\ Summary:")
print(f"Leave-One-User-Out Cross-Validation Results:")
print(f"Number of users: {len(all_accuracies)}")
print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f} (mean ± std)")
print(f"F1-Score: {mean_f1:.4f} ± {std_f1:.4f} (mean ± std)")
print(f"Best performing user: {all_results[np.argmax(all_accuracies)]['user']} ({max_accuracy:.4f})")
print(f"Worst performing user: {all_results[np.argmin(all_accuracies)]['user']} ({min_accuracy:.4f})")
print(f"Coefficient of Variation (Accuracy): {(std_accuracy/mean_accuracy*100):.2f}%")

print("\nBETTER F1 SCOREE od final resultot!!!:")
accuracies = [r['accuracy'] for r in all_test_results]
f1_macros = [r['f1_macro'] for r in all_test_results]
f1_weighteds = [r['f1_weighted'] for r in all_test_results]

print(f"Mean Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Mean Macro F1 Score: {np.mean(f1_macros):.4f} ± {np.std(f1_macros):.4f}")
print(f"Mean Weighted F1 Score: {np.mean(f1_weighteds):.4f} ± {np.std(f1_weighteds):.4f}")
