This script serves the building of a Heliophysics dictionary. 

As we make contributions to the Unified Astronomy Thesaurus, this script will take as input those contributed terms, attach to them the existing definitions from a compendium of Heliophysics terms lists/dictionaries and output them to a spreadsheet for harmonization and contribution to (an authoritative) Helio dictionary. 

Input: a Heliophysics term
Process: 
    look through the following glossaries for definitions and pull out all that exist:

    - HELIO Ontology
    - Heliophysics Event Knowledge Base
    - NASA CCMC
    - SWEET
    - SPASE
    - NASA Heliophysics Vocabulary
    - Space Weather Glossary
    - Space Weather Glossary (Second)
    - ESA Space Weather Glossary (ESA)

    (not yet applied:) identify similar terms to the input term


Output: {term: definition1, definition2, etc.}



NOTE this builds on 'add_definitions_to_terms.ipynb' 


In [33]:
import os, sys, re
import pandas as pd
import numpy as np

from glob import glob
import json


import csv


In [41]:
def update_csv(csv_filename, term, column_names, entries):
    # Ensure the CSV exists and has the right columns
    file_exists = os.path.isfile(csv_filename)
    
    # Always include 'term' as the first column
    full_columns = ['term'] + column_names
    
    if not file_exists:
        # Create CSV with header
        with open(csv_filename, mode='w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=full_columns)
            writer.writeheader()

    # Prepare a new row with empty cells
    new_row = {col: '' for col in full_columns}
    new_row['term'] = term  # Set the term value
    
    # Fill in the appropriate columns from the entries
    for entry in entries:
        if 'source:' in entry:
            description, source_info = entry.split('source:', 1)
            description = description.strip()
            source_file = source_info.strip()
            
            # Match the column based on the source filename
            matched = False
            for col in column_names:
                if source_file.startswith(col):
                    new_row[col] = description
                    matched = True
                    break
            if not matched:
                print(f"Warning: No matching column found for source '{source_file}'.")
    
    # Append the new row
    with open(csv_filename, mode='a', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=full_columns)
        writer.writerow(new_row)

In [35]:
list_glossaries = ['HELIO Ontology',
                   'Heliophysics Event Knowledge Base',
                   'NASA CCMC',
                   'SWEET',
                   'SPASE',
                   'NASA Heliophysics Vocabulary',
                   'Space Weather Glossary',
                   'Space Weather Glossary (Second)',
                   'ESA Space Weather Glossary (ESA)']

In [36]:
glossaries_list = [
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/HELIO Ontology.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/Heliophysics Events Knowledgebase.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/NASA CCMC.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/Semantic Web for Earth and Environment Technology Ontology.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/SPASE Dictionary.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/NASA Heliophysics Vocabulary.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/Space Weather Glossary.json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/Space Weather Glossary (Second).json',
     '/Users/ryanmc/Documents/NASA_FDL/2022/sdm-kg-nasa-2022/data/raw/SMDVocabulary/SupersetVocab/ESA Space Weather Glossary.json',
]


pd_compiled = pd.DataFrame(columns=['term','definition','source'])

column_names = []

terms_full = []
definitions_full = []
sources_full = []

for g in glossaries_list:#[0:1]:
    column_names.append(g[92:])
    print('working on {}...'.format(column_names[-1]))
    
    
    with open(g) as json_file:
        content = json.load(json_file)
        terms_loop = []
        definitions_loop = []
        sources_loop = []
        for inx, term in enumerate(content["Terms"]):
            terms_loop.append(re.sub(r'([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))', r'\1 ', term['Term']).lower())
            definitions_loop.append(term['Definition'])
        sources_loop = list(np.tile(str(g[92:]),(len(terms_loop),1)).flatten())
    
    # Add to full lists
    terms_full = terms_full + terms_loop
    definitions_full = definitions_full + definitions_loop
    sources_full = sources_full + sources_loop
    
pd_full = pd.DataFrame({'term':terms_full,'definition':definitions_full,'type':sources_full})
pd_full

working on HELIO Ontology.json...
working on Heliophysics Events Knowledgebase.json...
working on NASA CCMC.json...
working on Semantic Web for Earth and Environment Technology Ontology.json...
working on SPASE Dictionary.json...
working on NASA Heliophysics Vocabulary.json...
working on Space Weather Glossary.json...
working on Space Weather Glossary (Second).json...
working on ESA Space Weather Glossary.json...


Unnamed: 0,term,definition,type
0,thing,,HELIO Ontology.json
1,data resource,,HELIO Ontology.json
2,service,,HELIO Ontology.json
3,catalog,A tabular listing of events or observational n...,HELIO Ontology.json
4,record,,HELIO Ontology.json
...,...,...,...
14945,spacecraft effects,Effects observed as a result of the interactio...,ESA Space Weather Glossary.json
14946,space weather,Space Weather is the physical and phenomenolog...,ESA Space Weather Glossary.json
14947,space weather event,A time-limited condition of the space environm...,ESA Space Weather Glossary.json
14948,susceptibility,The response of a material or substance to a c...,ESA Space Weather Glossary.json


In [37]:
#INPUT a term
term = input('please enter a term to get defintions for: ')

please enter a term to get defintions for: aurora


In [38]:
idx = np.argwhere(pd_full['term'].values == term)

In [39]:
defs_loop = []
for i in idx:
#     print(pd_full.iloc[i])
    if pd_full.iloc[i]['definition'].values:
        defs_loop.append( pd_full.iloc[i]['definition'].values[0] + str('    source: ').format() + pd_full.iloc[i]['type'].values[0])


In [40]:
defs_loop

["An atmospheric phenomenon consisting of bands of light caused by charged solar particles following the earth's magnetic lines of force. (spase)    source: HELIO Ontology.json",
 'transient displays of light, often displaying as moving curtains and rays, at high latitudes associated with geomagnetic disturbances    source: NASA CCMC.json',
 'The sporadic radiant emission from the upper atmosphere over the middle and high latitudes.    source: Semantic Web for Earth and Environment Technology Ontology.json',
 "An atmospheric phenomenon consisting of bandsof light caused by charged solar particles following the earth's magneticlines of force.    source: SPASE Dictionary.json",
 "An aurora is a natural display of light in the night sky that typically occurs in far northern and southern regions. Auroras occur when incoming charged particles from the sun strike oxygen and nitrogen some 60 to 200 miles up in Earth's atmosphere and release a flash of light and heat. Electrons and protons rel

In [23]:
# NOT YET APPLIED: identify similar terms and attach their definitions with the similar term for later work in harmonizing



In [26]:
column_names

['HELIO Ontology.json',
 'Heliophysics Events Knowledgebase.json',
 'NASA CCMC.json',
 'Semantic Web for Earth and Environment Technology Ontology.json',
 'SPASE Dictionary.json',
 'NASA Heliophysics Vocabulary.json',
 'Space Weather Glossary.json',
 'Space Weather Glossary (Second).json',
 'ESA Space Weather Glossary.json']

In [42]:
# output to csv

# Example usage
csv_name = '/Users/ryanmc/Documents/Helio_ECIP/dev/Helio-KNOW/ADS_enrichment/Heliophysics_dictionary_harmonization.csv'

update_csv(csv_name, term, column_names, defs_loop)
