In [188]:
import pandas as pd
import numpy as np
import json
import re
import os

In [189]:
import json

# Specify the path to your .txt file that contains the JSON
txt_file_path = os.path.join(os.getcwd(), 'data','dailymed_output_686.txt')

# Specify the path where you want to save the .json file
json_file_path = os.path.join(os.getcwd(), 'data','dailymed_output_686.json')

# Read the JSON data from the .txt file
with open(txt_file_path, 'r') as txt_file:
    # Assuming the entire content of the file is a valid JSON string
    json_data = json.load(txt_file)

# Write the JSON data to a .json file
with open(json_file_path, 'w') as json_file:
    json.dump(json_data, json_file, indent=4)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/samueljon/Desktop/ODP/2024/Winter/drug-exp-forecast/data/dailymed_output_686.txt'

In [134]:
def clean_object(json):
    def unit_conversion(strength, units):
        # Note: these are the only ones found in data that are relevant to drug_list
        if units == 'g/L':
            return strength, 'mg/mL'
        elif units == 'mg/mm':
            return strength * 1000, 'mg/mL'  # assuming mm == mm^3
        elif units == 'ug/mL':
            return strength / 1000, 'mg/mL'
        elif units == 'g/mL':
            return strength * 1000, 'mg/mL'
        else:
            return strength, units

    def convert_strength(strength):
        match = re.match(r'(\d+\.?\d*)(\S+) in(\d+\.?\d*)(\S+)', strength)
        strength = ""
        units = ""
        if match:
            amount, amount_unit, volume, volume_unit = match.groups()
            strength = float(amount) / float(volume)
            units = f"{amount_unit}/{volume_unit}"
            strength, units = unit_conversion(strength, units)
        return strength, units
    
    if 'ndcs' in json.keys():
        ndcs = re.findall(r'\d{4,5}-\d{3,4}-\d{1,2}', json['ndcs'])
        json['ndcs'] = ndcs
    if 'actives' in json.keys():
        actives_processed = []
        for active in json['actives'].split(' - '):
            active_split = active.split('-')
            if len(active_split) > 2: continue
            ing, strength = active_split
            if len(ing) == 0: continue
            actives_processed.append(((ing.strip().title(),) + convert_strength(strength)))
        json['actives'] = actives_processed

    keep_keys = ['index', 'actives', 'route_type', 'ndcs', 'link', 'drug_name', 'inactive_ndcs']
    inactives = []
    for key, value in json.items():
        if key not in keep_keys:
            name_unii_split = key.split(' (UNII:')
            name = name_unii_split[0].strip().title()
            unii = name_unii_split[1].replace(')', '').strip() if len(name_unii_split) > 1 else ''
            strength, units = convert_strength(value)
            inactives.append((name, unii, strength, units))

    json = {key: json[key] for key in keep_keys if key in json.keys()}
    json['inactives'] = inactives
    return json



def filter_forModeling(data):
    """
    Filters input data to remove items based on specific conditions and provides detailed statistics.
    
    This function applies two filters to each item in the arrays of a given key in the input data:
    1. Removes items where 'route_type' is not 'INTRAVENOUS'.
    2. Removes items that have more than one 'actives'.
    
    After filtering, only items with 'actives', 'route_type', 'drug_name', and 'inactives' keys are retained.

    """
    
    # Statistics variables
    keys_before = len(data)
    item_lengths_before = {key: len(items) for key, items in data.items()}
    
    filtered_data = {}
    stats = {}
    keep_keys = ['drug_name', 'route_type', 'actives', 'inactives']
    
    for key, items in data.items():
        
        # Initial counts
        initial_count = len(items)
        filtered_for_route_type = 0
        filtered_for_actives = 0
        
        # Filter process

        unique_items_set = set()
        for item in items:
#             if item.get('route_type') != 'INTRAVENOUS':
#                 filtered_for_route_type += 1
#                 continue  # Skip this item due to 'route_type'
            if len(item.get('actives', [])) > 1:
                filtered_for_actives += 1
                continue  # Skip this item due to 'actives' count
            
            # Item passes filters, keep only specified keys
            filtered_item = {k: (tuple(v) if isinstance(v, list) else v) for k, v in item.items() if k in keep_keys}
            
            # Convert to a comparable format (tuple) for uniqueness check
            item_tuple = tuple(sorted(filtered_item.items()))
            unique_items_set.add(item_tuple)
            
        
        # Convert unique items back to dictionary format
        unique_filtered_items = [{k: (list(v) if isinstance(v, tuple) and k in ['actives', 'inactives'] else v) for k, v in item} for item in unique_items_set]
            
        # Update filtered data and stats
        if unique_filtered_items:
            filtered_data[key] = unique_filtered_items
            stats[key] = {
                'initial_count': initial_count,
                'filtered_for_route_type': filtered_for_route_type,
                'filtered_for_actives': filtered_for_actives,
                'final_count_before_deduplication': initial_count - filtered_for_route_type - filtered_for_actives,
                'final_count_after_deduplication': len(unique_filtered_items)
            }
    
    # Printing statistics
    print(f'Number of keys before filter: {keys_before}')
    print(f'Number of keys after filter: {len(filtered_data)}')
    
    for key, values in stats.items():
        print(f"\n'{key}': \n\
        Initial count: {values['initial_count']}, \n\
        Filtered for 'route_type': {values['filtered_for_route_type']}, \n\
        Filtered for 'actives': {values['filtered_for_actives']}, \n\
        Final count before deduplication: {values['final_count_before_deduplication']}, \n\
        Final count after deduplication: {values['final_count_after_deduplication']}")
    
    return filtered_data

In [190]:
path = os.path.join(os.getcwd(), 'data','dailymed_output_686.json')
with open(path) as f:
    data = json.load(f)
    
data = {key.replace('\u200b', '').replace('\xa0', ' ').strip().title(): value for key, value in data.items()}

for drug in data:
    data[drug] = [clean_object(item) for item in data[drug]]

In [191]:
data['Acetazolamide']

[{'index': 4,
  'actives': [('Acetazolamide', 100.0, 'mg/mL')],
  'route_type': 'INTRAVENOUS',
  'ndcs': ['67457-853-50'],
  'link': 'https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=521838b0-a10d-495f-a014-f9ff25a6b6f3',
  'drug_name': 'ACETAZOLAMIDE- acetazolamide sodium injection, powder, lyophilized, for solution',
  'inactive_ndcs': False,
  'inactives': [('Sodium Hydroxide', '55X04QC32I', '', ''),
   ('Hydrochloric Acid', 'QTT17582CB', '', '')]},
 {'index': 4,
  'actives': [('Acetazolamide', '', '')],
  'route_type': 'ORAL',
  'ndcs': ['16729-331-01', '16729-331-17'],
  'link': 'https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=9a368b35-cd51-47ad-83cc-ccc56c4585e8',
  'drug_name': 'ACETAZOLAMIDE capsule, extended release',
  'inactive_ndcs': False,
  'inactives': [('Ethyl Acrylate', '71E6178C9T', '', ''),
   ('Methacrylate/Methoxy Peg-10 Maleate/Styrene Copolymer',
    '39DK5WQ2PR',
    '',
    ''),
   ('Cellulose, Microcrystalline', 'OP1R32D61U', '', ''),
   ('Tal

In [192]:
filtered_data = filter_forModeling(data)
print(f'\n\nTotal number of API-Excp mixture data entries for modeling: {sum([len(v) for k,v in filtered_data.items()])}')


Number of keys before filter: 711
Number of keys after filter: 586

'Abacavir Sulfate': 
        Initial count: 31, 
        Filtered for 'route_type': 0, 
        Filtered for 'actives': 14, 
        Final count before deduplication: 17, 
        Final count after deduplication: 12

'Acamprosate Calcium': 
        Initial count: 8, 
        Filtered for 'route_type': 0, 
        Filtered for 'actives': 0, 
        Final count before deduplication: 8, 
        Final count after deduplication: 6

'Acetaminophen': 
        Initial count: 4960, 
        Filtered for 'route_type': 0, 
        Filtered for 'actives': 3534, 
        Final count before deduplication: 1426, 
        Final count after deduplication: 1089

'Acetazolamide': 
        Initial count: 64, 
        Filtered for 'route_type': 0, 
        Filtered for 'actives': 0, 
        Final count before deduplication: 64, 
        Final count after deduplication: 33

'Acetylcholine Chloride': 
        Initial count: 34, 
        F

In [217]:
filtered_data['Zolmitriptan']

[{'actives': [('Zolmitriptan', '', '')],
  'drug_name': 'ZOLMITRIPTAN tablet, orally disintegrating',
  'inactives': [('Anhydrous Citric Acid', 'XF417D3PSL', '', ''),
   ('Aspartame', 'Z0H242BBR1', '', ''),
   ('Silicon Dioxide', 'ETJ7Z6XBU4', '', ''),
   ('Gelatin', '2G86QN327L', '', ''),
   ('Magnesium Stearate', '70097M6I30', '', ''),
   ('Mannitol', '3OWL53L36A', '', ''),
   ('Cellulose, Microcrystalline', 'OP1R32D61U', '', ''),
   ('Orange', '5EVU04N5QU', '', ''),
   ('Polacrilin Potassium', '0BZ5A00FQU', '', ''),
   ('Sodium Stearyl Fumarate', '7CV7WJK4UI', '', ''),
   ('Crospovidone', '2S7830E561', '', '')],
  'route_type': 'ORAL'},
 {'actives': [('Zolmitriptan', '', '')],
  'drug_name': 'ZOLMITRIPTAN tablet, film coated',
  'inactives': [('Anhydrous Lactose', '3SY5LH9PMK', '', ''),
   ('Cellulose, Microcrystalline', 'OP1R32D61U', '', ''),
   ('Sodium Starch Glycolate Type A Corn', 'AG9B65PV6B', '', ''),
   ('Magnesium Stearate', '70097M6I30', '', ''),
   ('Silicon Dioxide', 'ET

In [194]:
unique_chems = {
    'inactive_unii': set(),
    'inactive_name': set(),
    'active_name': set()
}

for items in filtered_data.values():
    for entry in items:
        unique_chems['active_name'].update([active[0] for active in entry['actives']])
        unique_chems['inactive_unii'].update([inactive[1] for inactive in entry['inactives'] if inactive[1]])
        unique_chems['inactive_name'].update([inactive[0] for inactive in entry['inactives'] if not inactive[1]])

unique_chems = {key: list(value) for key, value in unique_chems.items()}

In [218]:
# Grab SMILES (canonical or isometric -- RDKit Chem.MolFromSmiles can take both)

# pubchem(UNII) --> SMILES
    # Need a step to handle when not found 
import requests
import time

def get_smiles_from_unii(unii_codes):
    base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
    smiles_dict = {}
    delay = 0.2  # Start with a default delay of 200ms between requests

    for unii in unii_codes:
        time.sleep(delay)  # Respect the delay before making a request
        url = f"{base_url}/compound/name/{unii}/property/CanonicalSMILES/JSON"
        
        response = requests.get(url)
        
        # Check for throttling headers and adjust delay
        if 'X-Throttling-Control' in response.headers:
            throttling_header = response.headers['X-Throttling-Control']
            print(f"Throttling status: {throttling_header}")  # Optional: print the throttling status for debugging
            if "Red" in throttling_header or "Black" in throttling_header:
                delay = 1  # Increase delay to 1 second if in Red or Black status
            elif "Yellow" in throttling_header:
                delay = 0.5  # Increase delay to 500ms if in Yellow status
            else:
                delay = 0.2  # Otherwise, reset to the default delay

        if response.status_code == 200:
            data = response.json()
            smiles = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
            smiles_dict[unii] = smiles
        elif response.status_code == 503:
            print(f"Request limit exceeded for UNII: {unii}. Retrying with increased delay.")
            time.sleep(delay)  # Wait before retrying
            continue  # Optionally, you can implement retry logic here
        else:
            print(f"Could not fetch data for UNII: {unii}")
            smiles_dict[unii] = None

    return smiles_dict


############ MARCH 25

#### Model development usihng existing one to one data

#### Data cleaning
## Do another pass of using OpenFDA
## Another pass for Names with ChemResolver
## Filter all found items
## Model
############


# pubchem(standardized name (inactive and active)) --> SMILES
    # Standardize Names
    
from fuzzywuzzy import process, fuzz
import pandas as pd
import requests
import time

def standardize_names(names_list):
    return [name.replace(" ()", "").strip().lower() for name in names_list]


# def resolve_chemical_name(names_list):
#     resolved_names = {}
#     base_url = "https://cactus.nci.nih.gov/chemical/structure"
#     for name in names_list:
#         url = f"{base_url}/{name}/names"
#         try:
#             response = requests.get(url)
#             if response.status_code == 200 and response.text:
#                 # Take the first resolved name as the canonical name
#                 canonical_name = response.text.split('\n')[0].lower()
#                 if canonical_name in resolved_names:
#                     resolved_names[canonical_name].append(name)
#                 else:
#                     resolved_names[canonical_name] = [name]
#             else:
#                 # If the name isn't found, create a key with an empty list
#                 resolved_names[name] = []
#         except Exception as e:
#             print(f"Error resolving name {name}: {e}")
#             resolved_names[name] = []
#     return resolved_names


import requests
import time

def resolve_chemical_name(names_list, retries=2, backoff_factor=1):
    resolved_names = {}
    base_url = "https://cactus.nci.nih.gov/chemical/structure"
    error_counter = 0  # Keep track of consecutive errors

    for name in names_list:
        url = f"{base_url}/{name}/names"
        attempt = 0
        while attempt < retries:
            try:
                response = requests.get(url)
                if response.status_code == 200 and response.text:
                    canonical_name = response.text.split('\n')[0].lower()
                    resolved_names[canonical_name] = resolved_names.get(canonical_name, []) + [name]
                    error_counter = 0  # Reset error counter on success
                    break  # Successful resolution, break out of the retry loop
                elif response.status_code == 500:
                    # Don't retry if a chemical doesn't exist (server returns 500)
                    resolved_names[name] = []
                    error_counter = 0  # Reset error counter
                    break
                elif response.status_code == 429 or response.status_code >= 502:
                    # Handle rate limiting and server errors
                    print(f"Server error for {name}, will retry after delay...")
                    error_counter += 1
                    time.sleep(backoff_factor * (2 ** attempt))  # Exponential backoff
                    attempt += 1
                else:
                    # For other client-side errors, don't retry
                    resolved_names[name] = []
                    break
            except requests.exceptions.RequestException as e:
                print(f"Request exception for {name}: {e}")
                error_counter += 1
                time.sleep(backoff_factor * (2 ** attempt))
                attempt += 1

            # Check if we're approaching the error limit
            if error_counter >= 25:  # Conservative approach to avoid hitting the limit
                print("Approaching server error limit, pausing...")
                time.sleep(180)  # Pause for 3 minutes to reset server's error count
                error_counter = 0

        if attempt == retries:
            print(f"Failed to resolve after {retries} attempts: {name}")
            resolved_names[name] = []

    return resolved_names


def combine_similar_names(resolved_names_dict, threshold=90):
    combined_names = {}
    keys_checked = set()

    for name in resolved_names_dict.keys():
        if name in keys_checked:
            continue
        similar_names = [key for key in resolved_names_dict.keys() if fuzz.ratio(name, key) >= threshold and key not in keys_checked]
        
        combined_list = []
        for similar_name in similar_names:
            combined_list.extend(resolved_names_dict[similar_name])
            keys_checked.add(similar_name)
        
        if similar_names:
            combined_names[similar_names[0]] = combined_list
        else:
            combined_names[name] = resolved_names_dict[name]

    return combined_names

#     # Deduplicate (similar using Fuzz)
# def deduplicate_names(names_list, threshold=90):
#     unique_names = {}
#     for name in names_list:
#         if not unique_names:  # If unique_names is empty, add the first name directly.
#             unique_names[name] = [name]
#             continue

#         match = process.extractOne(name, list(unique_names.keys()), scorer=fuzz.partial_ratio)
#         if match:
#             best_match, score = match
#             if score >= threshold:
#                 unique_names[best_match].append(name)
#             else:
#                 unique_names[name] = [name]
#         else:
#             unique_names[name] = [name]
    
#     # Select a representative for each group (e.g., the most common name)
#     representative_names = [names[0] for names in unique_names.values()]
#     return unique_names

#     # Name to SMILES
# def get_smiles_from_names(names):
#     base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
#     smiles_dict = {}

#     for name in names:
#         url = f"{base_url}/compound/name/{name}/property/CanonicalSMILES/JSON"
#         response = requests.get(url)
#         if response.status_code == 200:
#             data = response.json()
#             smiles = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
#             smiles_dict[name]= smiles
#         else:
#             smiles_dict[name] = None

#     return smiles_dict

In [201]:
unii_to_smiles = get_smiles_from_unii(unique_chems['inactive_unii'])

Throttling status: Request Count status: Green (0%), Request Time status: Green (0%), Service status: Green (23%)
Throttling status: Request Count status: Green (0%), Request Time status: Green (0%), Service status: Green (40%)
Could not fetch data for UNII: W2ZU1RY8B0
Throttling status: Request Count status: Green (1%), Request Time status: Green (0%), Service status: Green (27%)
Throttling status: Request Count status: Green (1%), Request Time status: Green (0%), Service status: Green (37%)
Could not fetch data for UNII: HZ58M6D839
Throttling status: Request Count status: Green (0%), Request Time status: Green (0%), Service status: Green (23%)
Could not fetch data for UNII: 724GKU717M
Throttling status: Request Count status: Green (1%), Request Time status: Green (0%), Service status: Green (30%)
Throttling status: Request Count status: Green (0%), Request Time status: Green (0%), Service status: Green (17%)
Could not fetch data for UNII: T91K54D6M1
Throttling status: Request Count s

Throttling status: Request Count status: Green (6%), Request Time status: Green (0%), Service status: Green (20%)
Could not fetch data for UNII: H7AGY1OJO8
Throttling status: Request Count status: Green (7%), Request Time status: Green (0%), Service status: Green (40%)
Could not fetch data for UNII: J8HE8A6E5T
Throttling status: Request Count status: Green (7%), Request Time status: Green (0%), Service status: Green (33%)
Could not fetch data for UNII: STJ856D1Z0
Throttling status: Request Count status: Green (7%), Request Time status: Green (0%), Service status: Green (33%)
Could not fetch data for UNII: CFN6G1F6YK
Throttling status: Request Count status: Green (9%), Request Time status: Green (0%), Service status: Green (30%)
Could not fetch data for UNII: 8I089SAH3T
Throttling status: Request Count status: Green (9%), Request Time status: Green (0%), Service status: Green (27%)
Throttling status: Request Count status: Green (8%), Request Time status: Green (0%), Service status: Gree

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 575DY8C1ER
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 21W82Q764G
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (10%)
Could not fetch data for UNII: BT3S9L53JK
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UNII: GYR30735RE
Throttling status: Request 

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: 8W46YN971G
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Yellow (50%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: G2M7P15E5P
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 33GX5WQC0M
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: J2S07SB0YL
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for U

Throttling status: Request Count status: Green (18%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: R33S7TK2EP
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: 19AH1RAF4M
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (18%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (18%), Request Time status: Green (1%), Service status: Green (43%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (13%)
Could not fetch data for UNII: 9S60Q72309
Throttling status: Request Count status: Green (14%), Request Time st

Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (13%)
Could not fetch data for UNII: F68VH75CJC
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: CI87N1IM01
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (23%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: V1IA3S3CUS
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (15%), Request Time st

Throttling status: Request Count status: Green (12%), Request Time status: Green (1%), Service status: Green (43%)
Throttling status: Request Count status: Green (12%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: 7T1F30V5YH
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 0KSY80VYS3
Throttling status: Request Count status: Green (12%), Request Time status: Green (0%), Service status: Green (33%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 1HIE915O2J
Throttling status: Request Count status: Green (13%), Request Time st

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Yellow (50%)
Could not fetch data for UNII: JK6142KK4O
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: SZH16H44UY
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (47%)
Could not fetch data for UNII: KM66971LVF
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 24RS0A988O
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (47%)
Could not fetch data for U

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: S546YLW6E6
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (23%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UNII: 8P20S56HZI
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 3NXW29V3WO
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UNII: BEI30UXK2N
Throttling status: Request 

Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 8M707QY5GH
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (43%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: 68T8I45

Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (23%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: 4XI6112496
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Yellow (53%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (40%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count stat

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 6I475159RA
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: UXX2N5V39P
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: HKY915780T
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: UDR641JW8W
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UN

Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: REM6A5QMC0
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (10%)
Could not fetch data for UNII: P9T4K47OM0
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: B1K89384RJ
Throttling status: Request Count status: Green (15%), Request Time st

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: BRY146A46V
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (10%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 3TNW8D08V3
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UNII: 8LGU7VM393
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 1VDG5Y5HS6
Throttling status: Request 

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UNII: R0XY39567G
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 8334LX7S21
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: DFM16KFA82
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: 2865993309
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UN

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Yellow (50%)
Could not fetch data for UNII: Q8Y7S3B85M
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: XOF597Q3KY
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 6TP696149N
Throttling status: Request Count status: Green (16%), Request Time s

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (20%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 4X4HLN92OT
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (23%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UNII: 161H3B14U2
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: 32K497ZK2U
Throttling status: Request Count status: Green (16%), Request Time st

Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: NMQ347994Z
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UNII: 949E52Z6MY
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (1

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 61W322NLDV
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: FGL3685T2X
Throttling status: Request Count status: Green (14%), Request Time status: Green (0%), Service status: Green (17%)
Could not fetch data for UNII: ECU18C66Q7
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 2788Z9758H
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (13%)
Throttling status: Request Count status: Green (13%), Request Time status: Green (0%), Service status: Green (33%)
Could not fetch data for UN

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 3C18L6RJAZ
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 2Z4LRR776S
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: M6S840WXG5
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (10%)
Could not fetch data for UNII: 8A6OMU3I8L
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (13%)
Could not fetch data for UNII: UDA30A2JJY
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service statu

Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: X9HD79I514
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (43%)
Could not fetch data for UNII: M6QU9ZUH2X
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: FV3431923Z
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: B8MIX97W95
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: HHT01ZNK31
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: C80684146D
Throttling status: Request Count status: Green (14%), Requ

Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (7%)
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 6AXS45P1QU
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UNII: R60QEP13IC
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Yellow (50%)
Could not fetch data for UNII: D3HPR4WW6F
Throttling status: Request Count status: Green (13%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 3OZN5NGR9L
Throttling status: Request 

Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 4R4HFI6D95
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (47%)
Could not fetch data for UNII: 029TFK992N
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: Z82Y2C65EA
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 02NG325BQG
Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UN

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: A906T4D368
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (33%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 2ZA36H0S2V
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (37%)
Could not fetch data for UNII: 67M3EQ6BE1
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (23%)
Could not fetch data for UNII: 7Z075S9991
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: 7J95K7ID2S
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service statu

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (17%)
Could not fetch data for UNII: K3R6ZDH4DU
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (13%)
Could not fetch data for UNII: 598D944HOL
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (43%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (13%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (33%)
Could not fetch data for UNII: HAF0412YIT
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: D2QHA03458
Throttling status: Request 

Throttling status: Request Count status: Green (15%), Request Time status: Green (1%), Service status: Green (40%)
Could not fetch data for UNII: 809Y72KV36
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (23%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 1PEZ3NLY6I
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (47%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (43%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (3

Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (17%)
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (40%)
Could not fetch data for UNII: FXG254HF10
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (20%)
Could not fetch data for UNII: 86V2W84PW4
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (30%)
Could not fetch data for UNII: 6HQ855798J
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UNII: 137PC46F89
Throttling status: Request Count status: Green (16%), Request Time status: Green (1%), Service status: Green (27%)
Throttling status: Request Count status: Green (17%), Request Time status: Green (1%), Service status: Green (27%)
Could not fetch data for UN

Throttling status: Request Count status: Green (13%), Request Time status: Green (0%), Service status: Green (20%)
Could not fetch data for UNII: 1Z74184RGV
Throttling status: Request Count status: Green (14%), Request Time status: Green (0%), Service status: Green (30%)
Throttling status: Request Count status: Green (19%), Request Time status: Green (1%), Service status: Green (30%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (0%), Service status: Green (17%)
Could not fetch data for UNII: 243K8QF0MS
Throttling status: Request Count status: Green (19%), Request Time status: Green (1%), Service status: Green (40%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (37%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Yellow (53%)
Throttling status: Request Count status: Green (14%), Request Time status: Green (1%), Service status: Green (

In [209]:
round(100*len([key for key, val in unii_to_smiles.items() if val]) / len(unii_to_smiles), 2)

44.18

In [219]:
names = standardize_names(unique_chems['active_name']+unique_chems['inactive_name'])

In [222]:
# TODO - Need to determine the rate request limit
resolvedd_chemical_names = resolve_chemical_name(names[:30])

In [223]:
resolvedd_chemical_names

{'4-[(3-chloro-4-methoxyphenyl)methylamino]-2-[(2s)-2-(hydroxymethyl)pyrrolidin-1-yl]-n-(pyrimidin-2-ylmethyl)pyrimidine-5-carboxamide': ['avanafil'],
 '(1s,4s)-4-(3,4-dichlorophenyl)-n-methyl-1,2,3,4-tetrahydronaphthalen-1-amine hydrochloride': ['sertraline hydrochloride'],
 'gadolinium cation (3+)': [],
 'n-[(4-hydroxy-3-methoxyphenyl)methyl]-8-methylnon-6-enamide': ['capsaicin'],
 'methyl 2-phenyl-2-piperidin-2-ylacetate': ['methylphenidate'],
 '(4s)-6-chloro-4-(2-cyclopropylethynyl)-4-(trifluoromethyl)-1h-3,1-benzoxazin-2-one': ['efavirenz'],
 '5-[2-(benzenesulfonyl)ethyl]-3-[[(2r)-1-methylpyrrolidin-2-yl]methyl]-1h-indole hydrobromide': ['eletriptan hydrobromide'],
 '2-(6-methoxynaphthalen-2-yl)propanoic acid': ['naproxen'],
 '2-[di(phenyl)methoxy]-n,n-dimethylethanamine': ['diphenhydramine'],
 'methyl 7-[(1r,2r,3r)-3-hydroxy-2-[(e)-4-hydroxy-4-methyloct-1-enyl]-5-oxocyclopentyl]heptanoate': ['misoprostol'],
 '4-[2-(tert-butylamino)-1-hydroxyethyl]-2-(hydroxymethyl)phenol': ['albu

In [87]:
# ## Next step is to get the mol2vec and 2D descriptors for each unique active + inactive
# unique_chemicals = {chem[0] for items in filtered_data.values() for item in items for chem in (item.get('actives', []) + item.get('inactives', [])) if chem}

# # Convert the set to a sorted list
# unique_chemical_names = sorted(unique_chemicals)

In [105]:
## UNII GRAB AS WELL
## Remove special characters prior to running through UNII grab for SMILES
len(unique_chemical_names)

2734

In [None]:
[
    [
        active: [mol2vec, 2d]
        inactivates [[mol2vec, 2d],[],]
        conditions [pH, density, ...]
    ]
    
    
    
    

]

In [None]:
## Research 2D -- 
## Research negative data for chemical mixtures 

In [104]:
import cv2
import numpy as np

# Load your image
image_path = '/Users/samueljon/Downloads/test_frame.png'  # replace with your image path
image = cv2.imread(image_path)

# Check if image was loaded
if image is None:
    print("Error: Could not load image.")
else:
    # Define your line coordinates
    x1, y1 = 78, 337
    x2, y2 = 229, 66

    # Create a line on the image
    color = (0, 255, 0)  # Line color (B, G, R)
    thickness = 2  # Line thickness
    cv2.line(image, (x1, y1), (x2, y2), color, thickness)

    # Save the image with the line
    save_path = '/Users/samueljon/Downloads/test_frame_wCV2Line.png'  # replace with your save path
    cv2.imwrite(save_path, image)
