# Resumen de Resultados - Proyecto Dark Matter

En este notebook se presentan los resultados más relevantes del modelo de detección de candidatas a materia oscura, obtenidos a partir de la fusión de:
- Probabilidad de materia oscura (`prob_ann`) generada por una red neuronal.
- Score de anomalía (`anomaly_score_norm`) obtenido mediante One-Class SVM.

Se visualizan los Top 5 candidatos, los gráficos principales y el resumen del log de ejecución.

In [None]:
import pandas as pd
import os
from datetime import datetime
import matplotlib.pyplot as plt
from IPython.display import Image, display, Markdown
import seaborn as sns
import numpy as np
import matplotlib.dates as mdates

In [None]:
project_root = os.path.abspath(os.path.join(os.path.dirname("."), '..'))
results_dir = os.path.join(project_root, 'outputs', 'results')

# Detectar archivos más recientes
files = [f for f in os.listdir(results_dir) if f.startswith('unids_combined_ann_ocsvm_') and f.endswith('.csv')]
files.sort(key=lambda x: os.path.getmtime(os.path.join(results_dir, x)), reverse=True)
latest_csv = files[0]
csv_path = os.path.join(results_dir, latest_csv)

# Detectar Top 5
top5_files = [f for f in os.listdir(results_dir) if f.startswith('top5_unids_ann_ocsvm_')]
top5_files.sort(key=lambda x: os.path.getmtime(os.path.join(results_dir, x)), reverse=True)
top5_path = os.path.join(results_dir, top5_files[0])

# Detectar gráficos
scatter_files = [f for f in os.listdir(results_dir) if f.startswith('scatter_ann_vs_anomaly_')]
bar_files = [f for f in os.listdir(results_dir) if f.startswith('top10_combined_ann_ocsvm_')]
scatter_path = os.path.join(results_dir, sorted(scatter_files, key=lambda x: os.path.getmtime(os.path.join(results_dir, x)), reverse=True)[0])
bar_path = os.path.join(results_dir, sorted(bar_files, key=lambda x: os.path.getmtime(os.path.join(results_dir, x)), reverse=True)[0])

# Detectar log
log_files = [f for f in os.listdir(results_dir) if f.startswith('fuse_ann_ocsvm_log_')]
log_files.sort(key=lambda x: os.path.getmtime(os.path.join(results_dir, x)), reverse=True)
log_path = os.path.join(results_dir, log_files[0])

In [None]:
df_top5 = pd.read_csv(top5_path)
print("Top 5 UNIDs con mayor score combinado:")
df_top5.style.bar(subset=['prob_ann', 'anomaly_score_norm', 'combined_score'], color='red')

In [None]:
print("Scatter Plot: Probabilidad ANN vs Score Anomalía")
display(Image(filename=scatter_path))


In [None]:
print("Bar Plot Top 10: Score Combinado")
display(Image(filename=bar_path))

In [None]:
# === Histograma de Score Combinado ===
plt.figure(figsize=(10, 6))
plt.hist(df_full['combined_score'], bins=30, edgecolor='black', color='#4daf4a')
plt.xlabel('Score Combinado')
plt.ylabel('Número de UNIDs')
plt.title('Distribución del Score Combinado ANN + OCSVM')
plt.tight_layout()
plt.show()


In [None]:
print("Resumen del Log de Ejecución:")
with open(log_path, 'r') as file:
    log_content = file.read()
display(Markdown(f'```\n{log_content}\n```'))

In [None]:
# === Resumen Estadístico ===
df_full[['prob_ann', 'anomaly_score_norm', 'combined_score']].describe().round(4)


In [None]:
df_full = pd.read_csv(csv_path)
df_full.head(10)

In [None]:
# === Tabla Top 20 UNIDs ===
top20 = df_full.sort_values(by='combined_score', ascending=False).head(20).copy()
top20.reset_index(drop=True, inplace=True)
top20.index += 1  # Ranking 1-20
top20.style.bar(subset=['prob_ann', 'anomaly_score_norm', 'combined_score'], color='magenta')


In [None]:
# === Mapa de Calor de Correlación ===
import seaborn as sns
corr = df_full[['prob_ann', 'anomaly_score_norm', 'combined_score']].corr()
plt.figure(figsize=(6, 4))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlación entre Scores')
plt.tight_layout()
plt.show()


¿Qué significa tener un score combinado alto?
¿Cuántas fuentes destacan?
¿Qué pasos seguirías después?