In [9]:
import pandas as pd
import yaml
import glob
import re

pattern_directory_path = "../curation/patterns-for-matching/"

changed_pattern_directory_path = "../curation/changed-patterns/"


{'pattern_name': 'abnormalAnatomicalEntity', 'pattern_iri': 'http://purl.obolibrary.org/obo/upheno/patterns/abnormalAnatomicalEntity.yaml', 'description': 'Any unspecified abnormality of an anatomical entity.', 'contributors': ['https://orcid.org/0000-0002-9900-7880', 'https://orcid.org/0000-0001-9076-6015', 'https://orcid.org/0000-0003-4148-4606', 'https://orcid.org/0000-0002-3528-5267', 'https://orcid.org/0000-0002-7356-1779', 'https://orcid.org/0000-0001-5208-3432', 'https://orcid.org/0000-0001-7941-2961', 'https://orcid.org/0000-0003-4606-0597'], 'classes': {'quality': 'PATO:0000001', 'abnormal': 'PATO:0000460', 'anatomical entity': 'UBERON:0001062'}, 'relations': {'inheres_in_part_of': 'RO:0002314', 'has_modifier': 'RO:0002573', 'has_part': 'BFO:0000051'}, 'annotationProperties': {'exact_synonym': 'oio:hasExactSynonym'}, 'vars': {'anatomical_entity': "'anatomical entity'"}, 'name': {'text': 'abnormal %s', 'vars': ['anatomical_entity']}, 'annotations': [{'annotationProperty': 'exact_synonym', 'text': 'abnormality of %s', 'vars': ['anatomical_entity']}], 'def': {'text': 'Abnormality of %s.', 'vars': ['anatomical_entity']}, 'equivalentTo': {'text': "'has_part' some ('quality' and ('inheres_in_part_of' some %s) and ('has_modifier' some 'abnormal'))", 'vars': ['anatomical_entity']}}

In [12]:
replacements = {
    "Abnormal change": "UHAUIYHIUHIUH",
    "abnormal bending": "bending",
    "abnormal closing": "closing",
    "abnormal coiling": "coiling",
    "abnormal decreased": "decreased",
    "abnormal increased": "increased",
    "abnormal duplication": "duplication",
    "abnormal fusion": "fusion",
    "abnormal incomplete": "incomplete",
    "abnormal opening": "opening",
    "Abnormal ability": "Ability",
    "Abnormal accumulation": "Accumulation",
    "Abnormal dilation": "Dilation",
    "Abnormal local accumulation": "Local accumulation",
    "An abnormality": "A change",
    "Abnormal morphological asymmetry": "Morphological asymmetry",
    "Abnormal proliferation": "proliferation",
    "Abnormal prominence": "prominence",
    "abnormal decrease": "decrease",
    "An abnormal development": "Changed development",
    "An abnormal reduction": "A reduction",
    "An abnormal ": "A changed ",
    "An abnormality ": "A change ",
    "an abnormal ": "a changed ",
    "abnormally curled":   "curling",
    "abnormal bending":   "bending",
    "abnormal ": "changed ",
    "Abnormal ": "Changed ",
    "An abnormally": "",
    "abnormally ": "",
    "Abnormally ": "",
    "UHAUIYHIUHIUH": "Abnormal change"
}




def get_all_patterns_as_yml(pattern_directory_path):
    all_configs = []
    for pattern_file_path in glob.glob(pattern_directory_path + '*.yaml'):
        with open(pattern_file_path, 'r') as pattern_file:
            y = yaml.safe_load(pattern_file)
            all_configs.append(y)
    return all_configs

def print_if_changed(original, new):
    if original != new:
        #print(f"{original}: {new}")
        pass

def update_text(original_text, replacements):
    first_capital = original_text[0].isupper()
    new_text = original_text
    for old, new in replacements.items():
        new_text = new_text.replace(old, new)
    new_text = re.sub(r'\s+', ' ', new_text).strip()
    if first_capital:
        new_text = new_text[0].upper() + new_text[1:]
    return new_text

def process_text(slot, pattern, changes, replacements):
    if slot:
        if 'text' in pattern[slot]:
            original_text = pattern[slot]['text']
        else:
            print(f"XXX: {slot} does not have a text field. Skipping.")
            return
    else:
        original_text = pattern['text']
    new_text = update_text(original_text, replacements)
    if original_text != new_text:
        print(f"{original_text}\t{new_text}")
        changes[original_text] = new_text
        if slot:
            pattern[slot]['text'] = new_text
        else:
            pattern['text'] = new_text
    else:
        print(f"{original_text}\tTODO")

def change_pattern(pattern_yaml, replacements, changes):
    pattern_yaml['description'] = ""
    if 'abnormal' in pattern_yaml['classes']:
        pattern_yaml['classes']['abnormal'] = 'PATO:0000460'

    process_text('name', pattern_yaml, changes, replacements)
    process_text('def', pattern_yaml, changes, replacements)
    
    if 'annotations' in pattern_yaml:
        for annotation in pattern_yaml['annotations']:
            if annotation['annotationProperty'] == 'exact_synonym':
                process_text(None, annotation, changes, replacements)
    


all_configs = get_all_patterns_as_yml(pattern_directory_path)

updated_patterns = []
changes = {}
for pattern in all_configs:
    change_pattern(pattern, replacements=replacements, changes = changes)
    updated_patterns.append(pattern)

print(changes)
for pattern in updated_patterns:
    with open(changed_pattern_directory_path + pattern['pattern_name'] + '.yaml', 'w') as pattern_file:
        yaml.dump(pattern, pattern_file, default_flow_style=False)

decreased resistance of organism to %s	TODO
Decreased resistance of organism to %s.	TODO
increased sensitivity of organism to %s	TODO
abnormal %s proliferation in %s	changed %s proliferation in %s
Abnormal proliferation of %s in %s.	Proliferation of %s in %s.
delayed %s	TODO
Delayed %s.	TODO
increased size of the %s	TODO
An increase in the size of the %s.	TODO
enlarged %s	TODO
agenesis of %s	TODO
Agenesis of %s.	TODO
abnormally lysed %s	lysed %s
%s with extensive cellular membrane destruction.	TODO
abnormally apoptotic %s in %s	apoptotic %s in %s
An abnormally apoptotic %s in %s.	An apoptotic %s in %s.
%s %s with apoptosis	TODO
decreased proportion of %s in %s	TODO
A decreased proportion of %s in the %s.	TODO
abnormality of %s physiology	TODO
Any functional anomaly of the %s.	TODO
abnormal %s	changed %s
Abnormality of %s.	TODO
abnormality of %s	TODO
inverted %s in %s	TODO
An abnormal configuration of the %s such that it is reversed in position, order, or condition in in %s.	A changed c

In [6]:
print(y)

{'pattern_name': 'abnormalAnatomicalEntity', 'pattern_iri': 'http://purl.obolibrary.org/obo/upheno/patterns/abnormalAnatomicalEntity.yaml', 'description': 'Any unspecified abnormality of an anatomical entity.', 'contributors': ['https://orcid.org/0000-0002-9900-7880', 'https://orcid.org/0000-0001-9076-6015', 'https://orcid.org/0000-0003-4148-4606', 'https://orcid.org/0000-0002-3528-5267', 'https://orcid.org/0000-0002-7356-1779', 'https://orcid.org/0000-0001-5208-3432', 'https://orcid.org/0000-0001-7941-2961', 'https://orcid.org/0000-0003-4606-0597'], 'classes': {'quality': 'PATO:0000001', 'abnormal': 'PATO:0000460', 'anatomical entity': 'UBERON:0001062'}, 'relations': {'inheres_in_part_of': 'RO:0002314', 'has_modifier': 'RO:0002573', 'has_part': 'BFO:0000051'}, 'annotationProperties': {'exact_synonym': 'oio:hasExactSynonym'}, 'vars': {'anatomical_entity': "'anatomical entity'"}, 'name': {'text': 'abnormal %s', 'vars': ['anatomical_entity']}, 'annotations': [{'annotationProperty': 'exac