In [1]:
import nbimporter
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Cm
import csv
from PIL import Image
from io import BytesIO
from aux_calculations import convert_to_float,conv_vel_a_m,text_mass_hypertrophy,text_diam_LV


#extraer los datos de las tablas
def extract_patient_info(doc)->dict:
    '''
    Accepts word docx and extracts info from the first table where the patient data resides
    returns a dictionary
    '''
    data = {}
    #itero sobre las tablas del documento
    for index_table,table in enumerate(doc.tables):
        #tabla 1 es la que contiene estos datos
        if index_table==1:
            for row_index, row in enumerate(table.rows):
                if row_index>0:
                    for cell in row.cells:
                        if cell.text.lower().strip not in data:  
                            key=cell.text.split(':')[0].strip()
                            value=cell.text.split(':')[1].strip().replace('  ',' ')
                            data[key]=value            
    return data
##########################

def read_csv(csv_path: str) -> list:
    """Reads the CSV file and returns a list of rows as dictionaries."""
    with open(csv_path, mode='r') as file:
        csv_reader = csv.DictReader(file)
        data = list(csv_reader)
    return data



def find_measurement_limit(data: list) -> int:
    """Finds the row index that marks the start of measurements."""
    for index, row in enumerate(data):
        if 'Lucho Card' in row.values() or 'card2'in row.values():
            return index
    return None


def extract_row(row: dict)-> dict:

    '''
    accepts a row from a csv read file. Each row functions as a dict.
    checks if the are more than one key for row to extract the values
    
    '''
    if (
            '2' in row 
            and row['2'] is not None
            and row['2']!='' 
            and row['2'].replace('-','').replace('.','').isnumeric()

        ):
            value=[row['1'],row['2']]

    else:
        value=row['1']
    return value



def extract_measurements_from_csv(data: list, measurements_list: list) -> dict:
    """Extracts measurements from the CSV data based on predefined measurement types."""
    limite=find_measurement_limit(data)
    extracted_data={}
    extract_next_row = False
    
    #extraigo los datos
    for index,row in enumerate(data):
        #genero el key con el primer valor de la row para el diccionario

        key=row.get('ï»¿Name','').strip().replace("'",'')

        if not key:
            continue    

        if extract_next_row:
            extracted_data[vel_grad]=extract_row(row)
            extract_next_row = False  # Reset the flag    
            
        if key in measurements_list:
            # Check if the element exists in the current row
            extract_next_row = True
            # Set flag to extract the next row
            vel_grad=key+'_grad'


        extracted_data[key]=extract_row(row)
        #cheque
       # if 'Carotid'not in str(path)
        #lucho card marca el inicio de los calculo, asi que con ese tomo el index
        #si el indice es mayor al limite estamos en la segunda tabla. siempre sigue un patron donde el ante penultivo
        #elemento es el key que requiero y el valor esta en unir
        if limite is not None and index>limite+1:
            #controlo el len de keys y v para modificar que variable sera key
            if len(row.keys())==7:
                k=-4
                v=k+1
            elif len(row.keys())==6:
                k=-3
                v=k+1
            elif len(row.keys())==5:
                k=-2
                v=k+1
            if row[list(row.keys())[k]] is not None:
                key_2=row[list(row.keys())[k]].strip().replace("'",'')
                value_2=row[list(row.keys())[v]]

                #chequeo que valores 2 sea lista para no pner simbolos que compliquen la renderizacion
                if isinstance(value_2,list):
                    extracted_data[key_2]=value_2[0]
                else: extracted_data[key_2]=value_2
    extracted_data=convert_to_float(extracted_data)
    extracted_data=conv_vel_a_m(extracted_data)
    extracted_data=text_mass_hypertrophy(extracted_data)
    extracted_data=text_diam_LV(extracted_data)
    return extracted_data             


def extract_patient_measurements(path)-> dict:
    '''
    toma el path del docx, lo cambia a un path terminado en .csv
    Extrae las mediciones del archivo csv del directorio de un paciente, generado por un dispositivo vinno
    '''
    #convierto path de doc en csv
    csv_path = path.with_suffix('.csv').as_posix().replace('Report.V3', 'Worksheet')
    
    #lista para convertir a gradientes    
    measurements_list = ['RVOT Vmax', 'AV Vmax', 'LVOT Vmax', 'TR Vmax', 'MV Vel E', 'MV Vel A']
    #la siguientes fila de las velocidades estan los gradientes
    
 
    try:
        csv_data = read_csv(csv_path)
        measurement_data=extract_measurements_from_csv(csv_data,measurements_list)
    except Exception as e:
        print(f"Error: {e}, unable to process the file")
        
    #genero el limite de la tabla de mediciones

    return measurement_data

#########################

def image_extractor(doc, template, tipo, image_width=Cm(8), image_height=Cm(5.36)) -> dict:
    '''
    Extrae las imágenes del reporte docx del dispositivo Vinno y devuelve un diccionario con objetos InlineImage.
    Requiere:
        - doc: Documento a extraer
        - template: Template donde se renderizarán las imágenes
        - tipo: Tipo de template (por ejemplo, 'stress')
    Se establecen medidas habituales de 5.36x8 cm, excepto para el mapa polar del stress que es 8.22 x 16.23 y 6.39 x 16.23 cm.
    '''
    image_dict = {}

    # Extraer imágenes
    for rel in doc.part.rels:
        rel_obj = doc.part.rels[rel]
        if 'image' in rel_obj.reltype:
            image_data = rel_obj.target_part.blob
            image = Image.open(BytesIO(image_data))
            compressed_image = BytesIO()

            # Extraer el número de imagen
            target = rel_obj.target_ref.split('.')[0].replace(r'media/', '')
            image_number = int(target.replace('image', ''))

            # Si es 'stress' y la imagen es 1 o 2, se manejan de manera diferente
            if tipo == 'stress' and image_number in [1, 2]:
                compressed_image = BytesIO(image_data)  # Mantener PNG sin convertir
                compressed_image.seek(0)
                # Definir tamaños específicos para las primeras dos imágenes
                if image_number == 1:
                    image_dict[target] = InlineImage(template,
                                                     compressed_image,
                                                     width=Cm(16.23), height=Cm(8.22))
                else:
                    image_dict[target] = InlineImage(template,
                                                     compressed_image,
                                                     width=Cm(16.23), height=Cm(6.39))
            else:
                # Si no es 'stress' o es una imagen normal, convertir a JPEG si es necesario
                if image.format != 'JPEG':
                    image = image.convert("RGB")  # Convertir a RGB para JPEG
                image.save(compressed_image, format='JPEG', quality=85)  # Guardar como JPEG
                compressed_image.seek(0)

                # Asignar tamaño predeterminado
                image_dict[target] = InlineImage(template,
                                                 compressed_image,
                                                 width=image_width,
                                                 height=image_height)

    # Ordenar las imágenes por su número
    key = sorted(image_dict.keys(), key=lambda image_name: int(image_name.replace('image', '')))
    image_dict = {i: image_dict[i] for i in key}
    image_dict = {'image': [{'key': k, 'image': v} for k, v in image_dict.items()]}

    return image_dict


def mot_extractor(doc)->dict:
    
    '''
    Extrae las puntaciones y los nombres de los segmentos en los score de motilidad del ecostress
    '''
    table=doc.tables[2].rows[1].cells[0].tables[0]
    mot={}
    for index_r, row in enumerate(table.rows):
        #la fila 1 contiene la primera info,la ultima info en la 17 (apex)
        if 1 <= index_r <= 17:
            key = None
            values = []
            for index_c, cell in enumerate(row.cells):
                #las primeras cuatro celdas son segment ID
                #la 5 celda es el nombre del segmento
                #la 6 es baseline, 7 peak, 8 recovery 
                if index_c == 5:
                    key = cell.text.lower().replace(' ', '_')
                if 5 < index_c <= 8 and key:
                    values.append(cell.text)

            if key and values:  # Store the key-value pair only if both key and values exist
                mot[key] = values
                contex = {'mot': [{'key': k, 'motilidad': v} for k, v in mot.items()]}                                             
                                                                   
    return contex                                                              


In [2]:
from docx import Document
path=r"C:\Users\Luciano\Desktop\[testtest]-[20241023000]-[8d90b9d3-30a9-40fd-aaa2-2d94d4dd1694]\1_S1-6P_card2.Report.V3.docx"
doc=Document(path)

In [12]:
def update_dictionary(dic:dict)->dict:
    '''
    takes a dictionary and searches calculations within the values to turn them into new keys
    '''
    calc_list=['*Dimensionless Index','*Flow Rate AS','CSA(LVOT)','SV(LVOT)','CSA(AV SV)','Reg Vol(PISA TR)',
               'EROA(PISA TR)','Flow Rate(PISA TR)','Reg Vol(PISA MR)','EROA(PISA MR)','Flow Rate(PISA MR)',
               'AVA(VTI)','RWT(2D)','LVd Mass(2D-ASE)'
              ]
    updates={}
    for val in dic.values():
        for c in calc_list:
            if c in val:
                key=val[val.index(c)]
                value=val[val.index(c)+1]
                updates[key]=value
    dic.update(updates)
    return dic

def extract_measurements(doc)->dict:
    '''
    Accepts word docx and extracts info from the first table where the patient data resides
    returns a dictionary
    '''
    data = {}
    #itero sobre las tablas del documento
    for index_table,table in enumerate(doc.tables):
        if index_table==2:
            for row_index, row in enumerate(table.rows):
                for cell in row.cells:
                    for st in cell.tables:
                        for index_rs,rs in enumerate(st.rows):
                            if index_rs>1:
                                conteiner=[]
                                for index_cs,cs in enumerate(rs.cells):
                                    if index_cs==0:
                                        text=cs.text.replace(' ','',2)
                                        if not text.startswith(' '):
                                            key=text
                                            subkey=''
                                        else:
                                            subkey=text
                                    else:
                        
                                        if (cs.text.strip()!='') and not (cs.text.endswith('Last')):
                                            conteiner.append(cs.text.strip().replace('-',''))
                                            data[key+subkey]=conteiner                                   
    data=update_dictionary(data)
    return data
# #                 if row_index>0:
#                     for cell in row.cells:
#                         if cell.text.lower().strip not in data:  
#                             key=cell.text.split(':')[0].strip()
#                             value=cell.text.split(':')[1].strip().replace('  ',' ')
#                             data[key]=value            

In [13]:
extract_measurements(doc)


{'AV Trace  Mean Vel(Max Mode)': ['57.82', 'cm/s'],
 'AV Trace  AT': ['150.00', 'ms'],
 'AV Trace  Vmax': ['66.87', 'cm/s'],
 'AV Trace  Peak PG': ['1.79', 'mmHg'],
 'AV Trace  AV VTI': ['15.36', 'cm'],
 'AV Trace  Mean PG(Max Mode)': ['1.39', 'mmHg'],
 'AV Trace  E. Time': ['270.00', 'ms'],
 'LVOT Trace  Vmax': ['70.61', 'cm/s'],
 'LVOT Trace  Peak PG': ['1.99', 'mmHg'],
 'LVOT Trace  LVOT VTI': ['10.71', 'cm'],
 'LVOT Trace  E. Time': ['0.18', 's'],
 'AR PHT': ['505.00', 'ms'],
 'PISA MR  Als.Vel': ['20.00', 'cm/s'],
 'MR Trace  MR VTI': ['5.94', 'cm'],
 'MR Trace  Vmax': ['71.58', 'cm/s'],
 'TV Vmax': ['25.79', 'cm/s'],
 'TV Vmax  PG': ['0.27', 'mmHg'],
 'PISA TR  Als.Vel': ['60.00', 'cm/s'],
 'TR Trace  TR VTI': ['14.45', 'cm'],
 'TR Trace  Vmax': ['65.96', 'cm/s'],
 'card22D  ': ['card2 2D', 'card2 2D', 'card2 2D', 'card2 2D', 'card2 2D'],
 'LVOT Diam': ['21.05', 'mm'],
 'PISA MR Radius': ['10.52', 'mm'],
 'PISA TR Radius': ['9.31', 'mm'],
 'IVSd': ['9.20', 'mm'],
 'LVIDd': ['36.4