In [None]:
# Import our Python client
from hawc_client import HawcClient
# We will need pandas for DataFrames and getpass for login
import pandas as pd
import getpass

# Login to HAWC

In [None]:
# HAWC domain you're trying to reach
domain = input("Domain: ")
# Your username
username = input("Username: ")
# Your password
password = getpass.getpass("Password: ")

# This is our client instance, which we will be using to access HAWC
client = HawcClient(domain)
client.authenticate(username,password)

print("Authentication successful")

# Upload Excel file

In [None]:
# Get the pathname for your file
filepath = input("Excel filepath: ")
# Column header for the HERO IDs
hero_column = input("HERO column name: ")
# Column header for tags
tag_column = input("Tag column name: ")
# Now we read in the file as a dataframe
excel_df = pd.read_excel(filepath)
if hero_column not in excel_df.columns:
    raise Exception(f"Column '{hero_column}' not in Excel sheet")
if tag_column not in excel_df.columns:
    raise Exception(f"Column '{tag_column}' not in Excel sheet")
print("Excel file read")
print(excel_df[[hero_column,tag_column]])

# Import HERO IDs

In [None]:
# Assessment that you will be working with
assessment_id = input("Assessment ID: ")

In [None]:
# Some of the HERO IDs may be missing from HAWC
# Prepare an import of missing references into HAWC
hero_ids = set(excel_df[hero_column])
print("HERO Import Parameters")
title = input("Import title: ")
description = input("Import description: ")

In [None]:
# Import the references into HAWC
print("Importing HERO IDs...")
client.lit.import_hero(assessment_id,title,description,hero_ids)

# Map Excel tags to HAWC tags

## Excel tags

In [None]:
# All unique tags from excel file
excel_tags = excel_df[tag_column].unique()
print("Tags in Excel:")
print(pd.Series(excel_tags))

## HAWC tags

In [None]:
# Get and print tags from assessment
assessment_tags = client.lit.tags(assessment_id)
print("Tags in HAWC:")
print(assessment_tags[["id","nested_name"]])

## Map tags

In [None]:
# Map the excel tag with the matching HAWC tag ID
# Submitting without input will skip that tag
print("Map the Excel tags with their matching HAWC tag ID.")
print("Submitting without input will skip that tag.")
tag_id_mapping = dict()
tag_name_mapping = dict()
for excel_tag in excel_tags:
    value = input(excel_tag)
    if value == "":
        tag_name_mapping[excel_tag] = "<omitted>"
        print("Tag omitted")
        continue
    tag_id = int(value)
    tag_id_mapping[excel_tag] = tag_id
    tag_name_mapping[excel_tag] = assessment_tags[assessment_tags['id']==tag_id].iloc[0]['nested_name']
    print(f"'{excel_tag}' mapped to '{tag_name_mapping[excel_tag]}'")

## Result

In [None]:
# Prints the mapping of excel tags to HAWC tags that will be used during import
tag_mapping_df = pd.DataFrame(list(tag_name_mapping.items()),columns=["Excel Tag", "HAWC Tag"])
tag_mapping_df

# Import reference tags

In [None]:
# Prepare the excel dataframe for import
hero_tag_df = excel_df.copy()
hero_tag_df[tag_column] = hero_tag_df[tag_column].map(tag_id_mapping)
hero_tag_df = hero_tag_df[[hero_column,tag_column]].rename(columns={hero_column:'hero_id',tag_column:'tag_id'}).dropna()

In [None]:
# Export references ids and corresponding HERO ids for given assessment
ref_to_hero = client.lit.reference_ids(assessment_id)
# Create dataframe that has HAWC reference ids with corresponding tag ids
ref_tag_df = ref_to_hero.merge(hero_tag_df, on="hero_id")[['reference_id','tag_id']].drop_duplicates()

In [None]:
# Import the tags
# The operation can be either "append" or "replace"
client.lit.import_reference_tags(assessment_id,csv=ref_tag_df.to_csv(index=False),operation="append")

HERO IDs and their tags have been imported