## Generating MOF with Target property 

### 1. Define MOF grammar

In [36]:
# Code of this cell from https://github.com/AminKaramlou/QNLG

from nltk import CFG
from nltk.parse.generate import generate

def _create_mof_search_space():
    MOF_GRAMMAR = """
        M -> T BB
        BB -> N E
        T -> 'pcu'
        N -> 'N106' | 'N123' | 'N139' | 'N144' | 'N248' | 'N394' | 'N155' | 'N173' | 'N205' | 'N505' 
        E -> 'E14' | 'E70' | 'E220' | 'E15' | 'E8' |'E35' | 'E183' | 'E191' | 'E1' | 'E9' | 'E28' | 'E59' | 'E161' | 'E225' | 'E229' 
        """
    
    VOCAB = ['pcu', 
             'N106', 'N123', 'N139', 'N144', 'N248', 
             'N394', 'N155',  'N173', 'N205', 'N505', 
             'E14', 'E70', 'E220', 'E15', 'E229',
             'E8', 'E35', 'E183', 'E191', 'E1', 
             'E9', 'E28', 'E59', 'E161', 'E225']
    
    topology = ["pcu"]
    building_blocks = ['N106', 'N123', 'N139', 'N144', 'N248', 'N394', 'N155',  'N173', 'N205', 'N505']

    GRAMMAR = CFG.fromstring(MOF_GRAMMAR)
    MOF = list(generate(GRAMMAR))

    def filter_mof(mofs):
    # Make sure no MOF appears more than once:
        if not len(set(mofs)) == len(mofs):
            return False

    # Make sure topology appears before node and edge:
        try:
            topo_position = next(i for i,v in enumerate(mofs) if v in topology)
            bb_position = next(i for i,v in enumerate(mofs) if v in building_blocks)
        except:
            return False
        return topo_position < bb_position

    MOF = list(filter(filter_mof, MOF))
    return MOF

### 2. Evaluate property class of MOFs based on relative probability distribution

In [37]:
from pathlib import Path
from lambeq import TketModel, spiders_reader, AtomicType, IQPAnsatz
from pytket.extensions.qiskit import AerBackend
import random
import numpy as np

model_dir = Path("./models/pv")
data_dir = Path("./mof_dataset/pv")

backend = AerBackend()
backend_config = {
    'backend': backend,
    'compilation': backend.default_compilation_pass(2),
    'shots': 8192
}

# Map target properties to model names
property_to_model = {
    "low": "q1",
    "mod low": "q2",
    "mod high": "q3",
    "high": "q4"
}
model_names = ["q1", "q2", "q3", "q4"]

# Load the model by its name
def load_model(model_name):
    model_path = model_dir / f'{model_name}/qnlp_binary_spider_model_{model_name}.lt'
    model = TketModel(backend_config=backend_config)
    model.load(str(model_path))
    return model

# Read true labels from the dataset file
def read_true_labels(model_name):
    true_labels = {}
    file_path = data_dir / f'{model_name}/pv_dataset_{model_name}.txt'
    with open(file_path, 'r') as file:
        for line in file:
            label, *mof_name = line.strip().split()
            mof_name = " ".join(mof_name)
            true_labels[mof_name] = label
    return true_labels

# Measure quantum circuit for a given MOF using a specific model
def _measure_quantum_circuit_for_mof(model, mof_name):
    diagram = spiders_reader.sentences2diagrams([mof_name])
    ansatz = IQPAnsatz({AtomicType.NOUN: 0, AtomicType.SENTENCE: 1}, n_layers=1, n_single_qubit_params=3)
    circuit = [ansatz(d) for d in diagram]
    prediction_probs = model.get_diagram_output(circuit)[0]
    return prediction_probs

# Output MOF when the randomly generated MOF matches with the target property satisfying confidence threshold
def evaluate_until_match(target_property, confidence_threshold=0.85, max_iterations=100):
    target_model = property_to_model[target_property]
    iteration = 0

    while iteration < max_iterations:
        mofs = _create_mof_search_space()
        mof_name = " ".join(random.choice(mofs))
        print(f"Iteration {iteration + 1}: Evaluating MOF: {mof_name} for target property: {target_property}")

        model_predictions = {}

        # Load all models and evaluate their predictions
        for model_name in model_names:
            model = load_model(model_name)
            prediction_probs = _measure_quantum_circuit_for_mof(model, mof_name)
            model_predictions[model_name] = prediction_probs

        total_prob_label_0 = sum(probs[0] for probs in model_predictions.values())
        relative_probs = {model: probs[0] / total_prob_label_0 for model, probs in model_predictions.items()}
        
        # Determine the model with the highest relative probability for label 0
        best_model = max(relative_probs, key=relative_probs.get)
        best_relative_prob = relative_probs[best_model]

        # Map the best-performing model back to the property name
        predicted_property = next(key for key, value in property_to_model.items() if value == best_model)

        for model_name, probs in model_predictions.items():
            print(f"{model_name}: Prediction Probs: {probs}, Relative Prob Label 0: {relative_probs[model_name]:.3f}")

        # Check if the predicted property matches the target property and relative probability is above threshold
        if predicted_property == target_property and best_relative_prob > confidence_threshold:
            best_label = 0 if model_predictions[best_model][0] > model_predictions[best_model][1] else 1
            true_labels = read_true_labels(best_model)
            true_label = true_labels.get(mof_name, "unknown")
            correctness = "correct" if str(best_label) == true_label else "incorrect"
            print(f"Best Model: {best_model}, Predicted Property: {predicted_property}, Predicted Label: {best_label}, True Label: {true_label}, Correctness: {correctness}")
            break

        iteration += 1

    if iteration >= max_iterations:
        print("Max iterations reached without matching target property")

### Example:

In [38]:
evaluate_until_match("high")

Iteration 1: Evaluating MOF: pcu N106 E229 for target property: high
q1: Prediction Probs: [0.048583 0.951417], Relative Prob Label 0: 0.060
q2: Prediction Probs: [0. 1.], Relative Prob Label 0: 0.000
q3: Prediction Probs: [0.5 0.5], Relative Prob Label 0: 0.619
q4: Prediction Probs: [0.25853659 0.74146341], Relative Prob Label 0: 0.320
Iteration 2: Evaluating MOF: pcu N123 E8 for target property: high
q1: Prediction Probs: [0.14655172 0.85344828], Relative Prob Label 0: 0.144
q2: Prediction Probs: [0.59872611 0.40127389], Relative Prob Label 0: 0.586
q3: Prediction Probs: [0.2027027 0.7972973], Relative Prob Label 0: 0.198
q4: Prediction Probs: [0.07327586 0.92672414], Relative Prob Label 0: 0.072
Iteration 3: Evaluating MOF: pcu N505 E225 for target property: high
q1: Prediction Probs: [0.13333333 0.86666667], Relative Prob Label 0: 0.201
q2: Prediction Probs: [0.32386364 0.67613636], Relative Prob Label 0: 0.489
q3: Prediction Probs: [0.02475248 0.97524752], Relative Prob Label 0: 0

### Test 100 times to evalute generation performance

In [34]:
def iter_test(target_property, confidence_threshold=0.85, max_iterations=100):
    target_model = property_to_model[target_property]
    iteration = 0

    while iteration < max_iterations:
        mofs = _create_mof_search_space()
        mof_name = " ".join(random.choice(mofs))
        
        model_predictions = {}

        for model_name in model_names:
            model = load_model(model_name)
            prediction_probs = _measure_quantum_circuit_for_mof(model, mof_name)
            model_predictions[model_name] = prediction_probs

        total_prob_label_0 = sum(probs[0] for probs in model_predictions.values())
        relative_probs = {model: probs[0] / total_prob_label_0 for model, probs in model_predictions.items()}
        best_model = max(relative_probs, key=relative_probs.get)
        best_relative_prob = relative_probs[best_model]
        predicted_property = next(key for key, value in property_to_model.items() if value == best_model)

        if predicted_property == target_property and best_relative_prob > confidence_threshold:
            true_labels = read_true_labels(best_model)
            true_label = true_labels.get(mof_name, "unknown")
            correctness = "correct" if true_labels[mof_name] == "0" else "incorrect"
            return mof_name, model_predictions[best_model], true_label, correctness, iteration + 1

        iteration += 1

    return mof_name, model_predictions[best_model], "unknown", "timeout", iteration

def evaluate_model_performance(user_property):
    correct_guesses = 0
    incorrect_guesses = 0
    timeouts = 0
    total_iterations = 0

    for _ in range(100):  
        mof_name, prediction_probs, true_label, correctness, iterations = iter_test(user_property)
        total_iterations += iterations
        
        if correctness == "correct":
            correct_guesses += 1
        elif correctness == "incorrect":
            incorrect_guesses += 1
        elif correctness == "timeout":
            timeouts += 1

    average_iterations_per_guess = total_iterations / 100 if total_iterations else 0

    print(f"Total Correct Guesses: {correct_guesses}")
    print(f"Total Incorrect Guesses: {incorrect_guesses}")
    print(f"Timeouts: {timeouts}")
    print(f"Average Iterations per Guess: {average_iterations_per_guess:.2f}")


### Example:

In [35]:
user_property = "high"
evaluate_model_performance(user_property)

Total Correct Guesses: 90
Total Incorrect Guesses: 9
Timeouts: 1
Average Iterations per Guess: 24.87
