## PFAS Screening
Okay, so here's the deal. We have 7 PFAS candidates as targets. We also have 765 probe material candidates. 

Edit: same idea we had for PFAS-v1. But now let's simplify the model a little bit and then for v4 we'll actually train it using only the original data and see if we get the same probe suggestions for PFOS detection.

The idea now is:
1. Go through all json files
2. Enumerate all different types of medium and condtions
3. Create two separate datasets: all_medium and all_conditions
4. Combine elements of all_PFAS + all_probe + all_medium + all_conditions to get a list of LDLs
5. Get the probe materials related to the lowest LDLs

In [24]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GraphConv, global_mean_pool
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
import snntorch as snn

# Category mappings for LDL as integers
ldl_mapping = {
    'Very Low Detection (High Sensitivity)': 0,
    'Low Detection': 1,
    'Moderate Detection': 2,
    'High Detection (Lower Sensitivity)': 3,
    'Very High Detection (Low Sensitivity)': 4
}

def extract_features(descriptors):
    numerical_values = []
    for key, value in descriptors.items():
        if key == "CID":
            continue
        if isinstance(value, (int, float)):
            numerical_values.append(float(value))
        elif isinstance(value, str) and value.replace('.', '', 1).isdigit():
            numerical_values.append(float(value))
    return torch.tensor(numerical_values, dtype=torch.float)

# Step 1: Find maximum feature length across JSON files
def find_max_feature_length(json_files, folder_path):
    max_feature_length = 0
    for f in json_files:
        data = json.load(open(os.path.join(folder_path, f), 'r'))
        for section in ['detect_target', 'probe_material', 'test_medium_electrolyte']:
            for item in data.get(section, []):
                feature_length = len(extract_features(item['substance_descriptors']))
                max_feature_length = max(max_feature_length, feature_length)
    return max_feature_length

# Updated helper function to extract features and include Magpie Descriptors for "inorganic solid"
def extract_features_with_magpie(descriptors, substance_type):
    numerical_values = []
    for key, value in descriptors.items():
        if key == "CID" or (substance_type == "inorganic solid" and "MagpieData" in key):
            continue
        if isinstance(value, (int, float)):
            numerical_values.append(float(value))
        elif isinstance(value, str) and value.replace('.', '', 1).isdigit():
            numerical_values.append(float(value))
    
    # Add Magpie Descriptors for inorganic solids if they exist
    if substance_type == "inorganic solid" and descriptors.get("Magpie Descriptors") is not None:
        magpie_descriptors = [
            float(v) for k, v in descriptors["Magpie Descriptors"].items() if isinstance(v, (int, float))
        ]
        numerical_values.extend(magpie_descriptors)
    
    return torch.tensor(numerical_values, dtype=torch.float)


# Updated aggregation function to use the new extract_features_with_magpie function
def aggregate_by_type(items, max_feature_length):
    types = {'small molecule': [], 'inorganic solid': [], 'polymer': []}
    for item in items:
        substance_type = item.get('substance_type', '').lower()
        if substance_type in types:
            # Extract features, considering Magpie Descriptors if the type is "inorganic solid"
            feature_tensor = F.pad(
                extract_features_with_magpie(item['substance_descriptors'], substance_type),
                (0, max_feature_length - len(extract_features_with_magpie(item['substance_descriptors'], substance_type)))
            )
            types[substance_type].append(feature_tensor)
    
    aggregated_features = []
    for type_key in types:
        if types[type_key]:
            aggregated_features.append(torch.mean(torch.stack(types[type_key]), dim=0))
        else:
            aggregated_features.append(torch.zeros(max_feature_length))
    
    return torch.cat(aggregated_features)

# Generate graph from JSON data (no changes needed here)
def generate_graph_from_json(data, max_feature_length):
    nodes = []
    edges = []

    # Aggregate target, probe, medium nodes with updated aggregation function
    target_node = aggregate_by_type(data.get('detect_target', []), max_feature_length)
    probe_node = aggregate_by_type(data.get('probe_material', []), max_feature_length)
    medium_node = aggregate_by_type(data.get('test_medium_electrolyte', []), max_feature_length)

    # Conditions node
    conditions_features = [
        data.get("test_operating_temperature_celsius", 0.0),
        data.get("min_pH_when_testing", -1.0),
        data.get("max_pH_when_testing", 0.0)
    ]
    conditions_node = F.pad(torch.tensor(conditions_features, dtype=torch.float),
                            (0, max_feature_length * 3 - len(conditions_features)))

    nodes.extend([target_node, probe_node, medium_node, conditions_node])

    edges = [
        (0, 1),  # Target -> Probe
        (0, 2),  # Target -> Medium
        (1, 2),  # Probe -> Medium
        (3, 2)   # Conditions -> Medium
    ]

    x = torch.stack(nodes)
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    ldl_label = ldl_mapping.get(data.get("LDL_category", ""), 0)
    return Data(x=x, edge_index=edge_index, y=torch.tensor(ldl_label, dtype=torch.long))

# Load and process data
folder_path = 'JSON_data/retrieved_substances_data_enhanced_json_files_2nd_attempt_11052024'
json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
max_feature_length = find_max_feature_length(json_files, folder_path)

# The rest of the data loading and processing code remains unchanged
def extract_and_concatenate_vectors(substances):
    vector_data = []
    for substance in substances:
        descriptors = substance.get('substance_descriptors', {})
        for key in ["Morgan_128", "maccs_fp", "morgan_fp_128"]:
            value = descriptors.get(key, [])
            if isinstance(value, list):
                vector_data.extend(value)
    return np.array(vector_data)

snn_data = []
gnn_data = []
labels = []

# Find the maximum vector length across all files
max_vector_length = 0
all_vectors = []

for f in json_files:
    # Load JSON data
    data = json.load(open(os.path.join(folder_path, f), 'r'))

    # Generate graph and append to gnn_data
    graph = generate_graph_from_json(data, max_feature_length)
    if graph is not None:
        gnn_data.append(graph)

        # Generate the spiking vector by concatenating target, probe, and medium
        spiking_vector = np.concatenate([
            extract_and_concatenate_vectors(data.get('detect_target', [])),
            extract_and_concatenate_vectors(data.get('probe_material', [])),
            extract_and_concatenate_vectors(data.get('test_medium_electrolyte', []))
        ])
        all_vectors.append(spiking_vector)

        # Update the maximum vector length if needed
        max_vector_length = max(max_vector_length, len(spiking_vector))
        # Append the label for classification
        labels.append(graph.y.item())

# Pad all_vectors to the maximum vector length
padded_vectors = [np.pad(vec, (0, max_vector_length - len(vec)), 'constant') for vec in all_vectors]

# Append each padded vector directly to snn_data
snn_data.extend(padded_vectors)


In [25]:
snn_data = torch.FloatTensor(snn_data)
labels = torch.LongTensor(labels)

X_train, X_test, y_train, y_test = train_test_split(snn_data, labels, test_size=0.2, random_state=42)
train_data, test_data = train_test_split(gnn_data, test_size=0.2, random_state=42)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

# Define SNN model with customizable beta
class SNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, beta):
        super(SNN, self).__init__()
        self.beta = beta
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.beta * self.fc2(x)
        return x

# Define GNN model with customizable number of layers
class GNN(nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes, num_layers):
        super(GNN, self).__init__()
        self.convs = nn.ModuleList([GraphConv(num_features if i == 0 else hidden_channels, hidden_channels) for i in range(num_layers)])
        self.lin = nn.Linear(hidden_channels, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x



In [26]:
class MultiModalModel(nn.Module):
    def __init__(self, snn_input_size, snn_hidden_size, gnn_input_size, gnn_hidden_size, num_classes, snn_beta=0.9, gnn_layers=2):
        super(MultiModalModel, self).__init__()
        self.snn = SNN(snn_input_size, snn_hidden_size, num_classes, beta=snn_beta)
        self.gnn = GNN(gnn_input_size, gnn_hidden_size, num_classes, num_layers=gnn_layers)
        self.fusion_layer = nn.Linear(num_classes * 2, num_classes)

    def forward(self, snn_input, gnn_data):
        snn_output = self.snn(snn_input)
        gnn_output = self.gnn(gnn_data)
        combined_output = torch.cat((snn_output, gnn_output), dim=1)
        final_output = self.fusion_layer(combined_output)
        return final_output

In [27]:
def train_multimodal(snn_loader, gnn_loader):
    model.train()
    total_loss = 0

    for (snn_batch, (gnn_data)) in zip(snn_loader, gnn_loader):
        snn_input, snn_labels = snn_batch
        snn_input, snn_labels = snn_input.to(device), snn_labels.to(device)
        gnn_data = gnn_data.to(device)

        optimizer.zero_grad()
        
        # Forward pass
        output = model(snn_input, gnn_data)
        
        # Calculate loss
        loss = criterion(output, gnn_data.y)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    return total_loss / len(snn_loader)

def test_multimodal(snn_loader, gnn_loader):
    model.eval()
    total_loss = 0
    preds, labels = [], []
    
    with torch.no_grad():
        for (snn_batch, gnn_data) in zip(snn_loader, gnn_loader):
            snn_data, snn_labels = snn_batch
            snn_data, snn_labels = snn_data.to(device), snn_labels.to(device)
            gnn_data = gnn_data.to(device)
            
            # Forward pass through multi-modal model
            output = model(snn_data, gnn_data)
            
            # Compute loss
            loss = criterion(output, gnn_data.y)
            total_loss += loss.item()
            
            # Predictions and labels for metrics
            pred = output.argmax(dim=1)
            preds.extend(pred.cpu().numpy())  # Save predicted labels
            labels.extend(gnn_data.y.cpu().numpy())  # Save true labels

    # Calculate evaluation metrics
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    avg_loss = total_loss / len(snn_loader)
    
    # Return evaluation metrics, predictions, and true labels
    return avg_loss, accuracy, f1, precision, recall, preds, labels

In [28]:
# Save models and their configurations
def save_model(model, file_path, description):
    """
    Save the trained model and print its characteristics.
    """
    # Save the model
    torch.save(model.state_dict(), file_path)
    
    # Print model characteristics
    print(f"Model saved to {file_path}")
    print("Model Description:")
    print(description)
    print("Number of Parameters:", sum(p.numel() for p in model.parameters()))
    print("Trainable Parameters:", sum(p.numel() for p in model.parameters() if p.requires_grad))

In [29]:
# Assuming gnn_data[0].x.shape[1] gives the correct input feature size for GNN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gnn_input_size = gnn_data[0].x.shape[1]

model = MultiModalModel(
    snn_input_size=snn_data.shape[1],  # 1358 based on snn_data.shape
    snn_hidden_size=64,
    gnn_input_size=gnn_input_size,     # Updated to match the actual feature size of gnn_data.x
    gnn_hidden_size=64,
    num_classes=5,
    snn_beta=0.85,
    gnn_layers=7
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

In [30]:
# Create DataLoader for SNN data with matching batch size
snn_train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
snn_test_loader = DataLoader(list(zip(X_test, y_test)), batch_size=64)

# Training loop
num_epochs = 200
for epoch in range(num_epochs):
    train_loss = train_multimodal(snn_train_loader, train_loader)
    if epoch % 5 == 0:
        print(f"Epoch {epoch}/{num_epochs}, Training Loss: {train_loss:.4f}")

# Final evaluation using test data loaders
test_loss, accuracy, f1, precision, recall, preds, labels = test_multimodal(snn_test_loader, test_loader)
print(f"Test Loss: {test_loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")



Epoch 0/200, Training Loss: 1.5019
Epoch 5/200, Training Loss: 1.0610
Epoch 10/200, Training Loss: 0.8368
Epoch 15/200, Training Loss: 0.6164
Epoch 20/200, Training Loss: 0.4844
Epoch 25/200, Training Loss: 0.3906
Epoch 30/200, Training Loss: 0.3590
Epoch 35/200, Training Loss: 0.3496
Epoch 40/200, Training Loss: 0.3021
Epoch 45/200, Training Loss: 0.3060
Epoch 50/200, Training Loss: 0.2797
Epoch 55/200, Training Loss: 0.2847
Epoch 60/200, Training Loss: 0.2630
Epoch 65/200, Training Loss: 0.2592
Epoch 70/200, Training Loss: 0.2684
Epoch 75/200, Training Loss: 0.2466
Epoch 80/200, Training Loss: 0.2763
Epoch 85/200, Training Loss: 0.2526
Epoch 90/200, Training Loss: 0.2577
Epoch 95/200, Training Loss: 0.2583
Epoch 100/200, Training Loss: 0.2406
Epoch 105/200, Training Loss: 0.2503
Epoch 110/200, Training Loss: 0.2330
Epoch 115/200, Training Loss: 0.2554
Epoch 120/200, Training Loss: 0.2321
Epoch 125/200, Training Loss: 0.2232
Epoch 130/200, Training Loss: 0.2450
Epoch 135/200, Training

In [31]:
# Define the folder path
folder_path = 'JSON_data/retrieved_substances_data_enhanced_json_files_2nd_attempt_11052024'

# List to hold all files ending with '_original.json'
json_files_org = [file for file in os.listdir(folder_path) if file.endswith('_original.json')]
max_feature_length_org = find_max_feature_length(json_files_org, folder_path)

In [32]:
snn_data = []
gnn_data = []
labels = []

# Find the maximum vector length across all files
max_vector_length = 0
all_vectors = []

for f in json_files_org:
    file_path = os.path.join(folder_path, f)  # Construct the full path to the file
    
    # Load JSON data
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)

    # Generate graph and append to gnn_data
    graph = generate_graph_from_json(data, max_feature_length_org)
    if graph is not None:
        gnn_data.append(graph)

        # Generate the spiking vector by concatenating target, probe, and medium
        spiking_vector = np.concatenate([
            extract_and_concatenate_vectors(data.get('detect_target', [])),
            extract_and_concatenate_vectors(data.get('probe_material', [])),
            extract_and_concatenate_vectors(data.get('test_medium_electrolyte', []))
        ])
        all_vectors.append(spiking_vector)

        # Update the maximum vector length if needed
        max_vector_length = max(max_vector_length, len(spiking_vector))

        # Append the label for classification
        labels.append(graph.y.item())

# Pad all_vectors to the maximum vector length
padded_vectors = [np.pad(vec, (0, max_vector_length - len(vec)), 'constant') for vec in all_vectors]

# Append each padded vector directly to snn_data
snn_data.extend(padded_vectors)

In [33]:
snn_data_org = torch.FloatTensor(snn_data)
labels_org = torch.LongTensor(labels)

_, X_test, _, y_test = train_test_split(snn_data_org, labels_org, test_size=0.999, random_state=42)
_, test_data = train_test_split(gnn_data, test_size=0.999, random_state=42)
#train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

# Create DataLoader for SNN data with matching batch size
#snn_train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
snn_test_loader = DataLoader(list(zip(X_test, y_test)), batch_size=64)



In [34]:
# Final evaluation using test data loaders
test_loss, accuracy, f1, precision, recall, preds, labels = test_multimodal(snn_test_loader, test_loader)
print(f"Test Loss: {test_loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Test Loss: 0.1912
Accuracy: 0.9068
F1 Score: 0.9072
Precision: 0.9096
Recall: 0.9068


In [35]:
import os
import json
import numpy as np
import torch

# Load the fixed detect_target from PFAS1.json
with open('PFAS_Screening/PFAS_detect_target_6.json', 'r') as pfas_file:
    pfas_data = json.load(pfas_file)
fixed_detect_target = pfas_data.get('detect_target', [])

# Define the folder path
folder_path = 'JSON_data/retrieved_substances_data_enhanced_json_files_2nd_attempt_11052024'

# List to hold all files ending with '_original.json'
json_files_org = [file for file in os.listdir(folder_path) if file.endswith('_original.json')]

# Initialize data storage
snn_data = []
gnn_data = []
labels = []

# Function to extract and concatenate vector data
def extract_and_concatenate_vectors(substances):
    vector_data = []
    for substance in substances:
        descriptors = substance.get('substance_descriptors', {})
        for key in ["Morgan_128", "maccs_fp", "morgan_fp_128"]:
            value = descriptors.get(key, [])
            if isinstance(value, list):
                vector_data.extend(value)
    return np.array(vector_data)

# Iterate through JSON files
max_vector_length = 0
all_vectors = []

for f in json_files_org:
    file_path = os.path.join(folder_path, f)  # Construct the full path to the file
    
    # Load JSON data
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)

    # Inject the fixed detect_target into the current data dictionary
    data['detect_target'] = fixed_detect_target

    # Aggregate target, probe, and medium nodes
    probe_data = data.get('probe_material', [])
    medium_data = data.get('test_medium_electrolyte', [])

    # Concatenate vectors from detect_target, probe, and medium
    spiking_vector = np.concatenate([
        extract_and_concatenate_vectors(data.get('detect_target', [])),
        extract_and_concatenate_vectors(probe_data),
        extract_and_concatenate_vectors(medium_data)
    ])
    all_vectors.append(spiking_vector)

    # Update the maximum vector length if needed
    max_vector_length = max(max_vector_length, len(spiking_vector))

    # Generate graph data
    graph = generate_graph_from_json(data, max_feature_length_org)
    if graph is not None:
        gnn_data.append(graph)
        labels.append(graph.y.item())

# Pad all_vectors to the maximum vector length
padded_vectors = [np.pad(vec, (0, max_vector_length - len(vec)), 'constant') for vec in all_vectors]

# Append each padded vector directly to snn_data
snn_data.extend(padded_vectors)

print(f"Processed {len(json_files_org)} JSON files with fixed detect_target.")


Processed 1192 JSON files with fixed detect_target.


In [36]:
snn_data_org = torch.FloatTensor(snn_data)
labels_org = torch.LongTensor(labels)

_, X_test, _, y_test = train_test_split(snn_data_org, labels_org, test_size=0.999, random_state=42)
_, test_data = train_test_split(gnn_data, test_size=0.999, random_state=42)
#train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

# Create DataLoader for SNN data with matching batch size
#snn_train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
snn_test_loader = DataLoader(list(zip(X_test, y_test)), batch_size=64)



In [37]:
# Final evaluation using test data loaders
_, _, _, _, _, preds, _ = test_multimodal(snn_test_loader, test_loader)
#print(preds)

In [39]:
from collections import Counter
import os
import json

# Step 1: Initialize variables
folder_path = 'JSON_data/retrieved_substances_data_enhanced_json_files_2nd_attempt_11052024'

# Step 2: Collect details of instances where preds == 0
zero_pred_details = []

for idx, pred in enumerate(preds):
    if pred == 0:
        file_name = json_files_org[idx]
        file_path = os.path.join(folder_path, file_name)

        with open(file_path, 'r') as json_file:
            json_data = json.load(json_file)

        probe_material = json_data.get('probe_material', [])
        probe_material_names = [
            item.get('substance_name', 'Unknown') for item in probe_material
        ]

        test_medium = json_data.get('test_medium_electrolyte', [])
        test_medium_names = [
            item.get('substance_name', 'Unknown') for item in test_medium
        ]

        conditions_vector = [
            json_data.get("test_operating_temperature_celsius", 0.0),
            json_data.get("min_pH_when_testing", -1.0),
            json_data.get("max_pH_when_testing", 0.0)
        ]

        zero_pred_details.append({
            "file_name": file_name,
            "probe_material_names": probe_material_names,
            "test_medium_names": test_medium_names,
            "conditions_vector": conditions_vector
        })

# Step 3: Print total instances where preds == 0
print(f"Number of predictions == 0: {len(zero_pred_details)}")

# Step 4: Filter out cases where conditions vector ends with [-1.0, -1.0]
filtered_zero_pred_details = [
    detail for detail in zero_pred_details if detail['conditions_vector'][-2:] != [-1.0, -1.0]
]

# Step 5: Print size of filtered dataset
print(f"Size of filtered dataset: {len(filtered_zero_pred_details)}")

# Step 6: Count and sort occurrences of probe materials
filtered_probe_material_names = [
    name for detail in filtered_zero_pred_details for name in detail['probe_material_names']
]
filtered_probe_material_counts = Counter(filtered_probe_material_names)
sorted_filtered_probe_material_counts = sorted(filtered_probe_material_counts.items(), key=lambda x: x[1], reverse=True)

# Step 7: Print occurrences of probe materials
print("Recurrent Probe Material Names (Filtered) (Decreasing Order):")
for name, count in sorted_filtered_probe_material_counts:
    print(f"{name}: {count} occurrences")


# Step 8: Count and sort occurrences of conditions
filtered_conditions = [
    tuple(detail['conditions_vector']) for detail in filtered_zero_pred_details
]
filtered_conditions_counts = Counter(filtered_conditions)
sorted_filtered_conditions_counts = sorted(filtered_conditions_counts.items(), key=lambda x: x[1], reverse=True)

# Print occurrences of conditions
print("Recurrent Conditions Vectors (Filtered) (Decreasing Order):")
for conditions, count in sorted_filtered_conditions_counts:
    print(f"{conditions}: {count} occurrences")


Number of predictions == 0: 195
Size of filtered dataset: 89
Recurrent Probe Material Names (Filtered) (Decreasing Order):
zinc oxide: 10 occurrences
graphene: 8 occurrences
indium oxide: 7 occurrences
aluminum oxide: 5 occurrences
tin dioxide: 5 occurrences
carbon nanotube: 5 occurrences
gallium oxide: 4 occurrences
gold: 3 occurrences
phenol: 3 occurrences
ethylene oxide: 3 occurrences
benzoic acid: 3 occurrences
fluorine: 2 occurrences
tin(iv) oxide: 2 occurrences
hafnium oxide: 2 occurrences
diamond: 2 occurrences
silicon nitride: 2 occurrences
silicon dioxide: 2 occurrences
polypyrrole: 2 occurrences
valinomycin: 2 occurrences
polydithiopropane sulfonic acid: 1 occurrences
cerium oxide: 1 occurrences
yttrium oxide: 1 occurrences
poly(3-amino-benzylamine-co-aniline): 1 occurrences
hafnium nitride: 1 occurrences
ethyl undecylenate: 1 occurrences
bismuth(iii) vanadate: 1 occurrences
platinum: 1 occurrences
triethylamine: 1 occurrences
bismuth oxide: 1 occurrences
l-cysteine: 1 occurr