In [1]:
# goal: use the mined dataset to create setting to verify a proposed similarity 
# measure between two outputs of platform A and platform B.

In [3]:
import numpy as np
import cirq
import qiskit
from qiskit.circuit import qpy_serialization
from qiskit import QuantumCircuit
from typing import List
import math
import pandas as pd
from IPython.display import display
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
import os

In [5]:
REAL_DATA_FOLDER = "../data/random_program_execution"
RANDOM_DATA_FOLDER = "../data/random_program_execution"
QASM_DATA_FOLDER = "../data/nisq_qasm"

In [6]:
records = [
    {
        "filepath": f,
        "hash": f.rsplit('_', 1)[0],
        "platform": f.rsplit('_', 1)[1].replace(".csv", "")
    } 
    for f in os.listdir(REAL_DATA_FOLDER)
    if f.endswith(".csv")
]
ids = list(set([r['hash'] for r in records]))
print(f"We have {len(ids)} records.")
ids[:5]

We have 1000 records.


['e2e61b72c823f6c498d424fc96f9ca902d4891bf3b3e06c4c41c275b1da8a4f6',
 '800bc8213d4d4a8bfacd9c55d5bbc47567e9774933bf2005b4bba02cdee3621a',
 '61315b4807dc067fdd9235f4cd60c36c3b85bdc996ca73f0ca2250d24fd14a49',
 '861981676798916eef3ea5ad9c25a9cd68d0d881baa711e80ebc4f1f9c5a1224',
 '5b3324a7e63f091d3cb02eb7b26ccf09adcec22f087f9f206e46df526e449420']

In [7]:
records_qasm = [
    {
        "filepath": f,
        "algo_name": f.rsplit('_', 1)[0],
        "platform": f.rsplit('_', 1)[1].replace(".csv", "")
    } 
    for f in os.listdir(QASM_DATA_FOLDER)
    if f.endswith(".csv")
]
ids_qasm = list(set([r['algo_name'] for r in records_qasm]))
print(f"We have {len(ids_qasm)} records for real qasm algorithms.")
ids_qasm[:5]

We have 12 records for real qasm algorithms.


['bv_n14', 'sat_n11', 'multiply_n13', 'simon_n6', 'ising_n10']

In [8]:
records_random = [
    {
        "filepath": f,
        "random": f.replace(".csv", "")
    } 
    for f in os.listdir(RANDOM_DATA_FOLDER)
    if f.endswith(".csv")
]
ids_random = list(set([r['random'] for r in records_random]))

In [9]:
def create_pairs(ids_left, suffix_left, folder_left,
                 ids_right, suffix_right, folder_right,
                 n_samples, shared_ids=False):
    """Create pairs of experiments left and rights.
    
    n_samples: int 
        number of circuits which are sampled. If zero, all the ids are used 
        in the current order and no sampling is performed.
    
    shared_ids: boolean
        whether the sampling procedure should be done once and the IDs
        should be shared, aka we get the same circuit from the two platforms
    """
    shortest_list = min(len(ids_left), len(ids_right))
    if shortest_list < n_samples:
        print(f"There are not enough circuits to sample: {shortest_list}")
        print(f"and you asked for: {n_samples}")
        print("Warning we then use all the circuits.")
        n_samples = shortest_list
    if n_samples > 0:
        left_candidates = np.random.choice(ids_left, size=n_samples, replace=False)
        right_candidates = np.random.choice(ids_right, size=n_samples, replace=False)
    else:
        left_candidates = ids_left
        right_candidates = ids_right
    if shared_ids:
        assert sorted(ids_right) == sorted(ids_left), "IDs left and right lists are not matching"
        right_candidates = left_candidates
    left_dfs = [
        pd.read_csv(
            os.path.join(folder_left, f"{id_left}{suffix_left}.csv")
        )
        for id_left in left_candidates
    ]
    right_dfs = [
        pd.read_csv(
            os.path.join(folder_right, f"{id_right}{suffix_right}.csv")
        )
        for id_right in right_candidates
    ]
    return zip(left_dfs, right_dfs)
    

In [10]:
pairs = create_pairs(ids_left=ids, suffix_left="_qiskit", folder_left=REAL_DATA_FOLDER,
                     ids_right=ids_random, suffix_right="", folder_right=RANDOM_DATA_FOLDER,
                     n_samples=50)

In [11]:
from IPython.display import display, HTML

In [12]:
for df_left, df_right in pairs:
    display(df_left.head())
    display(df_right.head())
    break

Unnamed: 0.1,Unnamed: 0,1101111010,0011011101,1011011101,1010011100,0001001100,0100000101,0010010101,1110110111,1010100010,...,0011100011,0000101111,1000111111,0011010001,1011111010,1010000100,0000111010,1000111010,1110010101,1110101011
0,0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
1,1,,,,2.0,,1.0,,,,...,,,,,,,,,,
2,2,1.0,1.0,,2.0,,2.0,,2.0,,...,,,,,,,,,,
3,3,,,1.0,1.0,,2.0,,4.0,,...,,,,,,,,,,
4,4,,,1.0,4.0,,2.0,,2.0,,...,,,,,,,,,,


Unnamed: 0.1,Unnamed: 0,1101101001,0001011111,0000100001,0100001111,1101101010,0100111011,0110011001,0001100101,0001011100,...,0100010100,0111100010,0011000110,1001100110,1110111111,1110101011,0001010001,0111100011,1110110110,0010101110
0,0,5.0,7.0,5.0,2.0,14,2.0,1.0,2.0,4.0,...,,,,,,,,,,
1,1,6.0,8.0,3.0,1.0,25,1.0,,2.0,1.0,...,,,,,,,,,,
2,2,6.0,5.0,2.0,1.0,11,4.0,,1.0,2.0,...,,,,,,,,,,
3,3,5.0,4.0,5.0,,23,,,1.0,7.0,...,,,,,,,,,,
4,4,2.0,6.0,5.0,,16,,,2.0,1.0,...,,,,,,,,,,


# Similarity Measures

In [30]:
from scipy.stats import ttest_ind

TOP_K_SOLUTION_TO_COMPARE = 5

def score_top_k(df_master, df_slave, first_k=TOP_K_SOLUTION_TO_COMPARE):
    # get the top k occurring solutions according to the master
    m = df_master.fillna(0).to_numpy()
    solution_cum_freq = m.sum(axis=0)
    most_frequent_solutions = np.argsort(solution_cum_freq)[::-1]
    if len(most_frequent_solutions) > first_k:
        most_frequent_solutions = most_frequent_solutions[:first_k]
    relevant_solutions = df_master.columns[most_frequent_solutions]
    freqs = solution_cum_freq[most_frequent_solutions]
    # compute p-values
    p_values = []
    df_master = df_master.fillna(0)
    df_slave = df_slave.fillna(0)
    for c in relevant_solutions:
        try:
            stat, p = ttest_ind(df_master[c], df_slave[c])
        except:
            if c not in df_master.columns:
                print(f"{c} column was missing from result of platform master")
                df_master[c] = 0
            if c not in df_slave.columns:
                print(f"{c} column was missing from result of platform slave")
                df_slave[c] = 0
            stat, p = ttest_ind(df_master[c], df_slave[c])    
        p_values.append(p)
    return min(p_values)

# Interpretation Routine

In [14]:
def interpret_as_p_value(score):
    print(f"P-Value like interpretation for: {score}")
    if score > 0.05:
        print('Probably the same distribution')
    else:
        print('Probably different distributions')

# Method Selection

In [17]:
detector = score_top_k

# Distinguish between: Real Quantum vs Uniform Random 

In [20]:
pairs = create_pairs(ids_left=ids, suffix_left="_qiskit", folder_left=REAL_DATA_FOLDER,
                     ids_right=ids_random, suffix_right="", folder_right=RANDOM_DATA_FOLDER,
                     n_samples=50)
scores = []
for df_left, df_right in pairs:
    score = detector(df_left, df_right)
    scores.append(score)
average_score = np.mean(scores)
interpret_as_p_value(average_score)

1101011101 column was missing from result of platform slave
0000100010 column was missing from result of platform slave
1010001001 column was missing from result of platform slave
0110100001 column was missing from result of platform slave
0001010010 column was missing from result of platform slave
0110110110 column was missing from result of platform slave
1001110100 column was missing from result of platform slave
1111101100 column was missing from result of platform slave
1110110001 column was missing from result of platform slave
0000011011 column was missing from result of platform slave
1001000100 column was missing from result of platform slave
0001100101 column was missing from result of platform slave
0101011110 column was missing from result of platform slave
1100101111 column was missing from result of platform slave
1001110011 column was missing from result of platform slave
0000011011 column was missing from result of platform slave
1101110110 column was missing from resul

# Distinguish between: Same (artificial) Distribution (Random)

In [21]:
pairs = create_pairs(ids_left=ids_random[:500], suffix_left="", folder_left=RANDOM_DATA_FOLDER,
                     ids_right=ids_random[500:], suffix_right="", folder_right=RANDOM_DATA_FOLDER,
                     n_samples=50)
scores = []
for df_left, df_right in pairs:
    score = detector(df_left, df_right)
    scores.append(score)
average_score = np.mean(scores)
interpret_as_p_value(average_score)

1010001100 column was missing from result of platform slave
0000011111 column was missing from result of platform slave
0101100011 column was missing from result of platform slave
0101101110 column was missing from result of platform slave
1101011111 column was missing from result of platform slave
0101011101 column was missing from result of platform slave
0010100111 column was missing from result of platform slave
0100110010 column was missing from result of platform slave
0000110110 column was missing from result of platform slave
0010010101 column was missing from result of platform slave
0110110011 column was missing from result of platform slave
0010100010 column was missing from result of platform slave
0010110010 column was missing from result of platform slave
1010111110 column was missing from result of platform slave
0101101001 column was missing from result of platform slave
1101110101 column was missing from result of platform slave
0011010111 column was missing from resul

**OBSERVATION**: testing only the first top-k (aka 5) bitstrings suffers when the distributions are very spread (uniform distributions)

# Distinguish: Same Platform, Same Circuit
Assumption: there are no bug in the sampling procedure of one platform

In [22]:
pairs = create_pairs(ids_left=ids, suffix_left="_qiskit", folder_left=REAL_DATA_FOLDER,
                     ids_right=ids, suffix_right="_cirq", folder_right=REAL_DATA_FOLDER,
                     n_samples=50, shared_ids=True)
scores = []
for df_left, df_right in pairs:

    for df in [df_left, df_right]:
        # NOTE THAT WE COMPARE THE SAME PLATFORM  
        # use the first half sampling on one side and the second half on the other
        half = int(len(df) / 2)
        score = detector(df[:half], df[half:])
        scores.append(score)
    
print('---- overall ----')
average_score = np.mean(scores)
interpret_as_p_value(average_score)
print('---- platfrom specific ----')
scores_left = [s for i, s in enumerate(scores) if i % 2 == 0]
scores_right = [s for i, s in enumerate(scores) if i % 2 != 0]
for name, scores in [('left', scores_left), ('right', scores_right)]:
    print(f'Platform {name}:')
    average_score = np.mean(scores)
    interpret_as_p_value(average_score)

---- overall ----
P-Value like interpretation for: 0.03684048990722175
Probably different distributions
---- platfrom specific ----
Platform left:
P-Value like interpretation for: 0.04039995258271368
Probably different distributions
Platform right:
P-Value like interpretation for: 0.033281027231729834
Probably different distributions


# Test on: Different Platform, Same (Real) Algo 

In [15]:
debug_ids = [
    'sat_n11'
]

In [32]:
folder = QASM_DATA_FOLDER
suffix_left = "_qiskit"
suffix_right = "_cirq"

left_dfs = [
    pd.read_csv(
        os.path.join(folder, f"{id_left}{suffix_left}.csv"),
        index_col=0
    )
    for id_left in ids_qasm
]
right_dfs = [
    pd.read_csv(
        os.path.join(folder, f"{id_right}{suffix_right}.csv"),
        index_col=0
    )
    for id_right in ids_qasm
]

scores = []
for df_left, df_right, id_program in zip(left_dfs, right_dfs, ids_qasm):
    print("-" * 80)
    print(id_program)
    display(df_left)
    display(df_right)
    score = detector(df_left, df_right)
    print(score)
    scores.append(score)
    break

--------------------------------------------------------------------------------
bv_n14


Unnamed: 0,1111111111111
0,1024
1,1024
2,1024
3,1024
4,1024
...,...
95,1024
96,1024
97,1024
98,1024


Unnamed: 0,1111111111111
0,1024
1,1024
2,1024
3,1024
4,1024
...,...
95,1024
96,1024
97,1024
98,1024


nan


In [20]:
pairs = create_pairs(ids_left=ids_qasm, suffix_left="_qiskit", folder_left=QASM_DATA_FOLDER,
                     ids_right=ids_qasm, suffix_right="_cirq", folder_right=QASM_DATA_FOLDER,
                     n_samples=0, shared_ids=True)
scores = []
for (df_left, df_right), record in zip(pairs, records_qasm):
    print("-" * 80)
    print(record)
    display(df_left)
    display(df_right)
    score = detector(df_left, df_right)
    print(score)
    scores.append(score)
    break
no_nan_scores = np.array(scores)
no_nan_scores = no_nan_scores[~np.isnan(no_nan_scores)]
average_score = np.mean(no_nan_scores)
interpret_as_p_value(average_score)

--------------------------------------------------------------------------------
{'filepath': 'simon_n6_cirq.csv', 'algo_name': 'simon_n6', 'platform': 'cirq'}
nan
--------------------------------------------------------------------------------
{'filepath': 'simon_n6_qiskit.csv', 'algo_name': 'simon_n6', 'platform': 'qiskit'}
0.1534379674360089
--------------------------------------------------------------------------------
{'filepath': 'bv_n14_qiskit.csv', 'algo_name': 'bv_n14', 'platform': 'qiskit'}
nan
--------------------------------------------------------------------------------
{'filepath': 'adder_n10_cirq.csv', 'algo_name': 'adder_n10', 'platform': 'cirq'}
000111 column was missing from result of platform slave
001011 column was missing from result of platform slave
001111 column was missing from result of platform slave
1.1574585212096103e-165
--------------------------------------------------------------------------------
{'filepath': 'sat_n11_cirq.csv', 'algo_name': 'sat_n11

In [21]:
scores

[nan,
 0.1534379674360089,
 nan,
 1.1574585212096103e-165,
 1.2794118867942466e-150,
 4.198105891732659e-239,
 8.468907697031769e-116,
 0.00898797260330989,
 0.0,
 0.0,
 5.901303463246013e-132,
 0.33181593880147986]