# Full Text Screening

Este notebook implementa el proceso de screening de texto completo siguiendo las guías PRISMA-ScR.

## Objetivos:
- Obtener textos completos de artículos incluidos en TIAB screening
- Aplicar criterios de inclusión/exclusión detallados
- Realizar screening independiente por múltiples revisores
- Resolver discrepancias y generar lista final
- Documentar razones de exclusión detalladas

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from udfs import *

# Configuración inicial
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## 1. Carga de Artículos para Full Text Screening

In [None]:
# Cargar resultados de TIAB screening
# tiab_included = pd.read_csv('../tiab_screening/resultados/tiab_included_records.csv')
# print(f"Artículos para full text screening: {len(tiab_included)}")

## 2. Obtención de Textos Completos

In [None]:
# Verificar disponibilidad de textos completos
fulltext_path = Path('inputs')
available_pdfs = list(fulltext_path.glob('*.pdf'))

print(f"PDFs disponibles en inputs/: {len(available_pdfs)}")

# Identificar artículos sin texto completo disponible
# missing_fulltext = identify_missing_fulltext(tiab_included, available_pdfs)
# print(f"Artículos sin texto completo: {len(missing_fulltext)}")

## 3. Procesamiento de Textos Completos

In [None]:
# Extraer texto de PDFs disponibles
# extracted_texts = extract_text_from_pdfs(available_pdfs)
# print(f"Textos extraídos exitosamente: {len(extracted_texts)}")

## 4. Criterios Detallados de Inclusión/Exclusión

In [None]:
# Criterios detallados para full text screening
detailed_criteria = {
    'inclusion': {
        'population': 'Adult patients with ADPKD',
        'biomarkers': 'Any biomarker for disease progression, diagnosis, or prognosis',
        'study_design': 'Observational studies, clinical trials, cohort studies',
        'outcomes': 'Biomarker performance, validation, or clinical utility',
        'language': 'English, Spanish, Portuguese',
        'time_period': 'No restrictions'
    },
    'exclusion': {
        'population': 'Pediatric only, animals, in vitro only',
        'study_type': 'Reviews, editorials, case reports (<10 patients)',
        'biomarkers': 'No biomarker data or genetic testing only',
        'outcomes': 'No relevant clinical outcomes',
        'access': 'Full text not available',
        'other': 'Duplicate publication, conference abstract only'
    }
}

print("Criterios detallados definidos para full text screening")

## 5. Screening Automatizado de Textos Completos

In [None]:
# Aplicar screening automatizado inicial
# auto_screening_results = automated_fulltext_screening(extracted_texts, detailed_criteria)
# print(f"Resultados de screening automatizado: {len(auto_screening_results)}")

## 6. Preparación para Screening Manual

In [None]:
# Preparar formularios de screening para revisores
# screening_forms = prepare_fulltext_screening_forms(tiab_included, detailed_criteria)
# print("Formularios de screening preparados")

## 7. Análisis de Resultados de Screening

In [None]:
# Cargar resultados de screening manual (placeholder)
# reviewer1_fulltext = load_fulltext_screening('reviewer1_fulltext.csv')
# reviewer2_fulltext = load_fulltext_screening('reviewer2_fulltext.csv')

# Calcular concordancia
# fulltext_kappa = calculate_fulltext_agreement(reviewer1_fulltext, reviewer2_fulltext)
# print(f"Concordancia en full text screening (Kappa): {fulltext_kappa:.3f}")

## 8. Resolución de Discrepancias

In [None]:
# Identificar y resolver discrepancias
# fulltext_discrepancies = identify_fulltext_discrepancies(reviewer1_fulltext, reviewer2_fulltext)
# print(f"Discrepancias en full text: {len(fulltext_discrepancies)}")

# resolved_fulltext = resolve_fulltext_discrepancies(fulltext_discrepancies)
# print(f"Discrepancias resueltas: {len(resolved_fulltext)}")

## 9. Resultados Finales

In [None]:
# Combinar resultados finales
# final_included = combine_fulltext_results(reviewer1_fulltext, reviewer2_fulltext, resolved_fulltext)
# included_studies = final_included[final_included['decision'] == 'include']

# print(f"Estudios finalmente incluidos: {len(included_studies)}")
# print(f"Estudios excluidos en full text: {len(final_included) - len(included_studies)}")

## 10. Análisis de Razones de Exclusión

In [None]:
# Analizar razones de exclusión
# exclusion_analysis = analyze_exclusion_reasons(final_included)
# print("Razones de exclusión en full text screening:")
# print(exclusion_analysis)

## 11. Guardado de Resultados Finales

In [None]:
# Guardar estudios finalmente incluidos
# included_studies.to_csv('resultados/final_included_studies.csv', index=False)
# final_included.to_csv('resultados/fulltext_all_decisions.csv', index=False)

# print("Resultados guardados:")
# print("  - resultados/final_included_studies.csv")
# print("  - resultados/fulltext_all_decisions.csv")

## 12. Reporte Final de Screening

In [None]:
# Generar reporte final completo del proceso de screening
# final_screening_report = generate_final_screening_report(
#     tiab_included, final_included, fulltext_kappa, exclusion_analysis
# )
# print(final_screening_report)