In [None]:
"""
{
  [assessment_name]: {
    "description": [description text],
    "url": [reproschema_url],
    "data elements": {
       [element_name]: {
          'description': [description text],
          'datatype': <type>,
          'choices': [
             [choice name],
             [another choice name],
           ],
           'termURL': [reproschema_url]
       }
     } 
   }
}
"""

"""
Ex now:
{
    "adhd_session_id": {
        "description": "Unique identifier for the ADHD session."
    }
}
"""
#dict of non-items only that match
#list of keys that don't correspond to any reproschema files
#
# def
# for each phenotype jsons
#   open as dict
#   for each key
#       recursively search reproschema for item files that match
#       if found at least one:
#           if at least one type item
#               then populate data
#           else
#              add to list of non-items only that match 
#       else:
#           add to list of keys that don't correspond to any reproschema files
#
# def populate data
#           

In [None]:
file_descriptions = {
    "laryngealDystonia.json": "Measures symptoms and characteristics of laryngeal dystonia.",
    "demographics.json": "Measures participant background details.",
    "adhd.json": "Measures attention-related behaviors and symptoms.",
    "airwaystenosis.json": "Measures the severity and characteristics of airway stenosis.",
    "als.json": "Measures symptoms and progression of ALS.",
    "alzheimers.json": "Measures cognitive decline and related symptoms.",
    "benignLesion.json": "Measures features of benign lesions.",
    "bipolar.json": "Measures symptoms and behaviors related to bipolar disorder.",
    "confounders.json": "Measures variables that could impact study outcomes.",
    "customAffectScale.json": "Measures emotional states.",
    "depression.json": "Measures severity and impact of depressive symptoms.",
    "dsm5.json": "Measures criteria according to DSM-5 standards.",
    "dyspnea.json": "Measures the presence and severity of dyspnea.",
    "eligibility.json": "Measures participant eligibility for the study.",
    "enrollment.json": "Measures participant registration details.",
    "gad7.json": "Measures severity of generalized anxiety.",
    "laryngealCancer.json": "Measures characteristics of laryngeal cancer.",
    "leicester.json": "Measures specific health or psychological attributes.",
    "panas.json": "Measures positive and negative affect.",
    "parkinsons.json": "Measures symptoms and progression of Parkinson's disease.",
    "participant.json": "Measures general study-related information.",
    "phq9.json": "Measures severity of depressive symptoms.",
    "precancerousLesions.json": "Measures features of precancerous lesions.",
    "ptsd.json": "Measures trauma-related symptoms.",
    "random.json": "Measures variables for various study purposes.",
    "stroop.json": "Measures cognitive control and processing speed.",
    "vhi10.json": "Measures perceived impact of voice disorders.",
    "vocab.json": "Measures language and word knowledge.",
    "vocalFoldParalysis.json": "Measures characteristics of vocal fold paralysis.",
    "voicePerception.json": "Measures how participants perceive voice quality.",
    "voiceSeverity.json": "Measures the impact and seriousness of voice disorders.",
    "winograd.json": "Measures language comprehension and reasoning."
}

In [None]:
import os
import json

b2ai_redcap2rs_activities_dir = "/Users/isaacbevers/sensein/reproschema-wrapper/b2ai-redcap2rs/activities"

def search_string_in_json_files(directory, search_string):
    matching_files = []  # List to store paths of matching JSON files

    # Walk through each directory and file in the given directory
    for root, dirs, files in os.walk(directory):
        for phenotype_file_name in files:
            file_path = os.path.join(root, phenotype_file_name)
            try:
                # Attempt to open and load the file as JSON
                with open(file_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)  # Load JSON data
                    # Check if the search string is present in the JSON content
                    if search_string in str(data) or search_string in str(phenotype_file_name):
                        matching_files.append(file_path)
            except Exception:
                # Skip files that cannot be read or loaded as JSON
                continue
    return matching_files  # Return the list of matching file paths
# search_string_in_json_files(b2ai_redcap2rs_activities_dir, "hello")
# search_string = "adhd_session_id"
# directory = "/Users/isaacbevers/sensein/reproschema-wrapper/b2ai-redcap2rs/activities/q_mood_adhd_adult/items"
# search_string_in_json_files(directory, search_string)


In [None]:
import requests

def is_url_resolvable(url):
    """
    Checks if the URL is resolvable.

    Parameters:
        url (str): The URL to check.

    Returns:
        bool: True if the URL is resolvable, False otherwise.
    """
    try:
        response = requests.get(url)
        return response.status_code == 200
    except requests.exceptions.RequestException:
        return False

def get_reproschema_raw_url(path, checksum="65734f24a32b69ed8dca2e92567cbb580cc0d492", branch="main"):
    """
    Generates a raw GitHub URL for a file in the project.

    Parameters:
        path (str): Path to the file in the project.
        checksum (str): The checksum of the file (default is a specific value).
        branch (str): Branch name (default is 'main').

    Returns:
        str: The raw GitHub URL.
    """
    path = path.split('/b2ai-redcap2rs/', 1)[-1]
    url = f"https://raw.githubusercontent.com/sensein/b2ai-redcap2rs/{checksum}/{path}"
    if is_url_resolvable(url):
        return url
    else:
        return False



In [None]:

# # def set_data_element():
# def get_reproschema_raw_url(checksum="65734f24a32b69ed8dca2e92567cbb580cc0d492"):
    


def populate_data_element(output_phenotype_dict, key, item_file_path, phenotype_file_name):
    # print(item_file_path)
    with open(item_file_path, 'r', encoding='utf-8') as file:
        reproschema_item = json.load(file)
    if key not in output_phenotype_dict:
        output_phenotype_dict[key] = {}
    if "question" in reproschema_item:
        output_phenotype_dict[key]["question"] = reproschema_item["question"]

    output_phenotype_dict[key]["datatype"] = reproschema_item["responseOptions"]["valueType"]

    if "choices" in reproschema_item["responseOptions"]:
        output_phenotype_dict[key]["choices"] = reproschema_item["responseOptions"]["choices"]
    else:
        output_phenotype_dict[key]["choices"] = None
    reproschema_raw_url = get_reproschema_raw_url(item_file_path)
    if reproschema_raw_url:
        output_phenotype_dict[key]["termURL"] = reproschema_raw_url

    #TODO add URL

    # phenotype_file_dict[phenotype_file_name][""] = # question field

        #       'description': [description text],
        #   'datatype': <type>,
        #   'choices': [
        #      [choice name],
        #      [another choice name],
        #    ],
        #    'termURL': [reproschema_url]

In [None]:
def get_all_schema_paths(directory):
    schema_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('schema'):
                schema_paths.append(os.path.join(root, file))
    return schema_paths


def get_activity_schema_path(item_path):
    activity_dir = os.path.join(item_path.split('/activities/', 1)[0], 'activities', item_path.split('/activities/', 1)[-1].split('/')[0])
    schema_paths = []
    for root, _, files in os.walk(activity_dir):
        for file in files:
            if file.endswith('schema'):
                schema_paths.append(os.path.join(root, file))
    if len(schema_paths) == 1:
        return schema_paths[0]
    else:
        # print(schema_paths)
        raise ValueError(f"Wrong number of schema paths: {len(schema_paths)}")

    

In [None]:
import copy 
            

matching_non_item_files = {}
multiple_item_files = []
non_matching = []

# Specify the phenotype_dir containing the .json files
phenotype_dir = "/Users/isaacbevers/sensein/b2ai-wrapper/b2aiprep/src/b2aiprep/prepare/resources/b2ai-data-bids-like-template/phenotype"
# Dictionary to store the loaded JSON data

def main():
    # Loop through each file in the phenotype_dir
    for phenotype_file_name in os.listdir(phenotype_dir):
        # Check if the file ends with .json and is not "<measurement_tool_name>.json"
        if phenotype_file_name.endswith(".json") and phenotype_file_name != "<measurement_tool_name>.json":
            file_path = os.path.join(phenotype_dir, phenotype_file_name)


            with open(file_path, 'r', encoding='utf-8') as file:
                phenotype_file_dict = json.load(file)

            # unnest to make output idempotent
            if len(phenotype_file_dict) == 1: 
                key = next(iter(phenotype_file_dict))
                if "data_elements" in phenotype_file_dict[key]:
                    phenotype_file_dict = phenotype_file_dict[key]["data_elements"]

            activity_schema_path = ""
            output_phenotype_dict = copy.deepcopy(phenotype_file_dict) 
            for key in phenotype_file_dict:
                if '___' in key:
                    new_key = key.split('___')[0]
                    if new_key not in phenotype_file_dict:
                        key = new_key
                        output_phenotype_dict[key] = {}
                
                file_paths = search_string_in_json_files(b2ai_redcap2rs_activities_dir, key)
                if file_paths:
                    item_file_paths = [path for path in file_paths if "item" in path]
                    if item_file_paths and len(item_file_paths) == 1:
                        populate_data_element(output_phenotype_dict, key, item_file_paths[0], phenotype_file_name)
                        if not activity_schema_path:
                            activity_schema_path = get_activity_schema_path(item_file_paths[0])
                    elif item_file_paths and len(item_file_paths) > 1:
                        # select the correct one
                        for path in item_file_paths:
                            if os.path.basename(path) == key:
                                if not activity_schema_path:
                                    activity_schema_path = get_activity_schema_path(path)
                                populate_data_element(output_phenotype_dict, key, path, phenotype_file_name)
                        multiple_item_files.append(item_file_paths)
                    else:
                        matching_non_item_files[key] = file_paths
                else:
                    non_matching.append(key)
            print(activity_schema_path)

            activity_schema_name = os.path.basename(activity_schema_path)
            output_phenotype_dict = {"data_elements": output_phenotype_dict}
            output_phenotype_dict["description"] = file_descriptions[phenotype_file_name]
            output_phenotype_dict["url"] = get_reproschema_raw_url(activity_schema_path)
            output_phenotype_dict = {
                    "description": output_phenotype_dict["description"],
                    "url": output_phenotype_dict["url"],
                    "data_elements": output_phenotype_dict["data_elements"]
                }
            output_phenotype_dict = {activity_schema_name: output_phenotype_dict}
            # output_phenotype_dict = dict(sorted(output_phenotype_dict.items()))
            # output_phenotype_dict = dict(sorted(output_phenotype_dict.items(), key=lambda item: len(str(item[1]))))

            # TODO
            # output_phenotype_dict[phenotype_file_name]["url"] = 
            #phenotype_file_dict[phenotype_file_name][data elements] = 
            # if phenotype_file_name not in output_phenotype_dict:
            #     output_phenotype_dict = {phenotype_file_name: RS ASSESSMENT NAME}
            # print(output_phenotype_dict)
            with open(file_path, 'w', encoding='utf-8') as file:
                json.dump(output_phenotype_dict, file, ensure_ascii=False, indent=4)
"""
{
  [assessment_name]: {
    "description": [description text],
    "url": [reproschema_url],
    "data elements": {
"""
            # save data

#       if found at least one:
#           if at least one type item
#               then populate data
#           else
#              add to list of non-items only that match 
#       else:
#           add to list of keys that don't correspond to any reproschema files
        
main()



In [None]:
print(len(non_matching))
print(non_matching)
print(len(matching_non_item_files))
print(matching_non_item_files)
print(len(multiple_item_files))


In [None]:
phenotype_dir = "/Users/isaacbevers/sensein/b2ai-wrapper/b2aiprep/src/b2aiprep/prepare/resources/b2ai-data-bids-like-template/phenotype"

def count_items_with_only_descriptions():
    single_entry_fields = {}
    for phenotype_file_name in os.listdir(phenotype_dir):
        if phenotype_file_name.endswith(".json") and phenotype_file_name != "<measurement_tool_name>.json":
            single_entry_fields[phenotype_file_name] = []
            file_path = os.path.join(phenotype_dir, phenotype_file_name)

            # Open and load the JSON file
            with open(file_path, 'r', encoding='utf-8') as file:
                phenotype_file_dict = json.load(file)

            for key in phenotype_file_dict:
                if len(phenotype_file_dict[key]) < 2:
                    single_entry_fields[phenotype_file_name].append(key)
            
    single_entry_fields_count = 0
    for key in single_entry_fields:
        single_entry_fields_count += len(single_entry_fields[key])
        single_entry_fields[key] = len(single_entry_fields[key])
    print(single_entry_fields)
    print(single_entry_fields_count)

count_items_with_only_descriptions()
        

In [None]:
#check response choices with the redcap data dictionary

import pandas as pd

def get_non_matching_response_choices(data_dict_path, phenotype_path):
    non_matching = []

    # Load the data dictionary CSV into a DataFrame
    data_dict = pd.read_csv(data_dict_path)

    # Get list of all phenotype JSON files
    phenotype_files = [file for file in os.listdir(phenotype_path) if file.endswith('.json')]

    for phenotype_file in phenotype_files:
        with open(os.path.join(phenotype_path, phenotype_file), 'r') as f:
            phenotype_data = json.load(f)

        # Iterate over each data element in the phenotype JSON
        for element_name, element_details in phenotype_data.get("data_elements", {}).items():
            # Filter the data dictionary for the matching element name
            print(element_name)
            print(element_details)
            # filtered_data_dict = data_dict[data_dict["Variable / Field Name"] == element_name]

            # if filtered_data_dict.empty:
            #     continue

            # filtered_entry = filtered_data_dict.iloc[0]
            # valid_labels = filtered_entry.get("Choices, Calculations, OR Slider Labels", "")
            # choices = element_details.get("choices", [])

            # # Check if choices exist and perform lexical matching
            # if choices:
            #     for choice in choices:
            #         if choice not in valid_labels:
            #             non_matching.append(choice)

    return non_matching

# Example usage
data_dict_path = "/Users/isaacbevers/sensein/b2ai-wrapper/bridge2ai-redcap/data/bridge2ai_voice_project_data_dictionary.csv"
phenotype_path = "/Users/isaacbevers/sensein/b2ai-wrapper/b2aiprep/src/b2aiprep/prepare/resources/b2ai-data-bids-like-template/phenotype"
non_matching_choices = get_non_matching_response_choices(data_dict_path, phenotype_path)
print(non_matching_choices)
















