In [40]:
import glob
import re

In [41]:
files = glob.glob("C:/Users/paola/Desktop/ClinicalReadings/*.txt")

In [61]:
class XRayImageMetadata:
    def __init__(self, **kwargs):
        self.age = kwargs.get('age', None)
        self.gender = kwargs.get('gender', None)
        self.filename = kwargs.get('filename', None)
        self.report = kwargs.get('report', None)
    def __str__(self):
        return f"<{self.filename}>:{self.gender}-{self.age} years -{self.report}"
    
class XRayMetadataReader:
    def __init__(self, folder, **kwargs):
        self.xrays = []
        self.folder = folder
        self.filenames = None
    
    def get_filenames(self):
        self.filenames = glob.glob(self.folder)
        return self.filenames
    
    def parse_files(self):
        pass
class ChinaXRayMetadataReader(XRayMetadataReader):
    def clear_firstline(self, firstline):
        """
        Normally the first line is something like:
        <gender> <age>yrs
        
        """
        lowered_case = firstline.lower()
        gender = None
        if 'female' in firstline:
            gender = 'female'
        else:
            if 'male' in firstline:
                gender = 'male'
        
        try:
            age = int(re.findall('\d+', firstline)[0])
        except IndexError:
            age = None
        return gender, age
    
        
    def parse_files(self):
        for file in self.get_filenames():
            with open(file) as txtfile:
                content = txtfile.read()
                lines = content.split('\n')
                lines = [l.strip() for l in lines]
                gender, age = self.clear_firstline(lines[0])
                report = lines[1]
                xray = XRayImageMetadata(gender = gender, age=age, filename=file, report = report)
                print(xray)
                
    

In [62]:
china = ChinaXRayMetadataReader("C:/Users/paola/Desktop/ClinicalReadings/*.txt")

In [63]:
china.parse_files()

<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0001_0.txt>:male-45 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0002_0.txt>:male-63 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0003_0.txt>:female-48 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0004_0.txt>:male-58 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0005_0.txt>:male-28 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0006_0.txt>:male-60 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0007_0.txt>:female-53 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0008_0.txt>:male-48 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0009_0.txt>:female-71 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0010_0.txt>:female-59 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0011_0.txt>:male-43 years -normal
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0012_0.txt>:male-39 years -normal
<C:/User

<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0562_1.txt>:male-33 years -left PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0563_1.txt>:male-69 years -left PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0564_1.txt>:female-38 years -Right PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0565_1.txt>:male-79 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0566_1.txt>:male-38 years -Right PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0567_1.txt>:male-61 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0568_1.txt>:male-50 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0569_1.txt>:male-51 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0570_1.txt>:male-28 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0571_1.txt>:female-36 years -Right PTB
<C:/Users/paola/Desktop/ClinicalReadings\CHNCXR_0572_1.txt>:female-57 years -bilateral PTB
<C:/Users/paola/Desktop/ClinicalReadi

In [111]:
class MontgomeryXRayMetadataReader(XRayMetadataReader):
    """
    Normally the first line is something like:
    <Patient's Sex: (the first letter of the gender, like F or M)>
    the second line:
    <Patient's Age: (number with 3 digits Y)>
    the third line:
    <report>
    
    """
    
    def patient_gender(self, firstline):
        gender = None
        if 'F' in firstline:
            gender = 'female'
        elif 'M' in firstline:
            gender = 'male'
        return gender

    def patient_age(self, secondline):
        try:
            age = int(re.findall('\d+', secondline)[0])
            
        except IndexError:
            age = None
        return age
    def read_files(self):
        DataMontgomery = []
        for file in self.get_filenames():
            with open(file) as txtfile:
                content = txtfile.read()
                lines = content.split('\n')
                lines = [l.strip() for l in lines]
                gender = self.patient_gender(lines[0])
                age = self.patient_age(lines[1])
                report = lines[2]
                xray = XRayImageMetadata(gender = gender, age = age, filename = file, report = report)
                DataMontgomery.append(xray)
        return DataMontgomery
    
    
       
    
                                            
        
        
        

In [112]:
Montgomery = MontgomeryXRayMetadataReader('C:/Users/paola/Desktop/TB/MontgomerySet/ClinicalReadings/*.txt')

In [113]:
DataMontgomery = Montgomery.read_files()