In [1]:
import pandas as pd
import matplotlib.pyplot as plt

def display_counts(CHECKPOINT, DISTANCE):
    # Overall title
    if CHECKPOINT == "X":
        TITLE = "CoV-RoBERTa without Contrastive Learning"
    elif CHECKPOINT == "4":
        TITLE = "CoV-SNN Transformer with Best Zero-shot Accuracy"
    elif CHECKPOINT == "0":
        TITLE = "CoV-SNN Transformer with Best Test Accuracy"

    # Reading the data
    df_eris_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_eris.csv")
    df_eris_cscs['cscs_sp'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(grammaticality)']
    df_eris_cscs['cscs_ip'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(1/perplexity)']

    df_new_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_new.csv")
    df_new_cscs['cscs_sp'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(grammaticality)']
    df_new_cscs['cscs_ip'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(1/perplexity)']

    df_gpt_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_gpt.csv")
    df_gpt_cscs['cscs_sp'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(grammaticality)']
    df_gpt_cscs['cscs_ip'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(1/perplexity)']

    # Combine the three DataFrames into one for easier plotting
    df_combined = pd.concat([
        df_eris_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='Eris'),
        df_new_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='New'),
        df_gpt_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='GPT'),
    ])

    # Prepare data for the merged table
    top_k_values = [100, 500, 1000]
    rows = ['CSCS by SP', 'CSCS by IP']
    data = []

    # Loop through each metric and top-K value to gather data
    for metric in rows:
        row_data = []
        for k in top_k_values:
            if metric == 'CSCS by IP':
                df_combined_topk = df_combined.sort_values(by='cscs_ip', ascending=False).head(k)
            else:  # 'CSCS by SP'
                df_combined_topk = df_combined.sort_values(by='cscs_sp', ascending=False).head(k)
            
            # Assuming 'Eris', 'New', and 'GPT' are specific sources, count occurrences
            eris_count = df_combined_topk['source'].value_counts().get('Eris', 0)
            new_count = df_combined_topk['source'].value_counts().get('New', 0)
            gpt_count = df_combined_topk['source'].value_counts().get('GPT', 0)
            
            row_data.extend([eris_count, new_count, gpt_count])
        data.append(row_data)

    # Create multi-level columns
    columns = pd.MultiIndex.from_product(
        [[f'Top-{k}' for k in top_k_values], ['Eris', 'New', 'GPT']],
        names=['Top-K', 'Source']
    )

    # Create the DataFrame with multi-level columns
    merged_table = pd.DataFrame(data, index=rows, columns=columns)

    # Display the merged table
    display(merged_table)

    return merged_table

In [2]:
table_0 = display_counts("0", "L2")
table_4 = display_counts("4", "L2")

row_names_0 = ["CoV-SNN$^1$ (\\texttt{{SP}}) &", "CoV-SNN$^1$ (\\texttt{{IP}}) &"]
row_names_4 = ["CoV-SNN$^5$ (\\texttt{{SP}}) &", "CoV-SNN$^5$ (\\texttt{{IP}}) &"]

# print rows of each table. add & between cell values
for i, row in enumerate(table_0.iterrows()):
    print(row_names_0[i].format(), ' & '.join(map(str, row[1].values)), "\\\\")
for i, row in enumerate(table_4.iterrows()):
    print(row_names_4[i].format(), ' & '.join(map(str, row[1].values)), "\\\\")


Top-K,Top-100,Top-100,Top-100,Top-500,Top-500,Top-500,Top-1000,Top-1000,Top-1000
Source,Eris,New,GPT,Eris,New,GPT,Eris,New,GPT
CSCS by SP,51,38,11,363,104,33,763,173,64
CSCS by IP,63,30,7,396,84,20,795,159,46


Top-K,Top-100,Top-100,Top-100,Top-500,Top-500,Top-500,Top-1000,Top-1000,Top-1000
Source,Eris,New,GPT,Eris,New,GPT,Eris,New,GPT
CSCS by SP,92,5,3,484,9,7,978,11,11
CSCS by IP,98,1,1,494,2,4,987,6,7


CoV-SNN$^1$ (\texttt{SP}) & 51 & 38 & 11 & 363 & 104 & 33 & 763 & 173 & 64 \\
CoV-SNN$^1$ (\texttt{IP}) & 63 & 30 & 7 & 396 & 84 & 20 & 795 & 159 & 46 \\
CoV-SNN$^5$ (\texttt{SP}) & 92 & 5 & 3 & 484 & 9 & 7 & 978 & 11 & 11 \\
CoV-SNN$^5$ (\texttt{IP}) & 98 & 1 & 1 & 494 & 2 & 4 & 987 & 6 & 7 \\


In [3]:
def display_percentages(CHECKPOINT, DISTANCE):
    # Reading the data
    df_eris_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_eris.csv")
    df_eris_cscs['cscs_sp'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(grammaticality)']
    df_eris_cscs['cscs_ip'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(1/perplexity)']

    df_new_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_new.csv")
    df_new_cscs['cscs_sp'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(grammaticality)']
    df_new_cscs['cscs_ip'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(1/perplexity)']

    df_gpt_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_gpt.csv")
    df_gpt_cscs['cscs_sp'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(grammaticality)']
    df_gpt_cscs['cscs_ip'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(1/perplexity)']

    # Combine the three DataFrames into one for easier plotting
    df_combined = pd.concat([
        df_eris_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip']].assign(source='Eris'),
        df_new_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip']].assign(source='New'),
        df_gpt_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip']].assign(source='GPT'),
    ])

    # Prepare data for the merged table
    top_k_values = [100, 500, 1000]
    rows = ['CSCS by SP', 'CSCS by IP']
    data = []
    for metric in rows:
        row_data = []
        for k in top_k_values:
            if metric == 'CSCS by IP':
                df_combined_topk = df_combined.sort_values(by='cscs_ip', ascending=False).head(k)
            else:  # 'CSCS by SP'
                df_combined_topk = df_combined.sort_values(by='cscs_sp', ascending=False).head(k)
            
            eris_percentage = df_combined_topk['source'].value_counts(normalize=True).get('Eris', 0)
            # change eris_percentage to string in XX.X% format
            eris_percentage = "{:.1f}".format(eris_percentage*100)
            #new_percentage = df_combined_topk['source'].value_counts(normalize=True).get('New', 0)
            #gpt_percentage = df_combined_topk['source'].value_counts(normalize=True).get('GPT', 0)

            row_data.extend([eris_percentage])
        data.append(row_data)
    
    # Create multi-level columns
    columns = pd.MultiIndex.from_product(
        [[f'Top-{k}' for k in top_k_values], ['Eris']],
        names=['Top-K', 'Source']
    )

    # Create the DataFrame with multi-level columns
    merged_table = pd.DataFrame(data, index=rows, columns=columns)

    # Display the merged table
    display(merged_table)

    return merged_table

In [4]:
table_0 = display_percentages("0", "L2")
table_4 = display_percentages("4", "L2")

row_names_0 = ["CoV-SNN$^1$ (\\texttt{{SP}}) &", "CoV-SNN$^1$ (\\texttt{{IP}}) &"]
row_names_4 = ["CoV-SNN$^5$ (\\texttt{{SP}}) &", "CoV-SNN$^5$ (\\texttt{{IP}}) &"]

# print rows of each table. add & between cell values
for i, row in enumerate(table_0.iterrows()):
    print(row_names_0[i].format(), '\% & '.join(map(str, row[1].values)) + "\% \\\\")
for i, row in enumerate(table_4.iterrows()):
    print(row_names_4[i].format(), '\% & '.join(map(str, row[1].values)) + "\% \\\\")

Top-K,Top-100,Top-500,Top-1000
Source,Eris,Eris,Eris
CSCS by SP,51.0,72.6,76.3
CSCS by IP,63.0,79.2,79.5


Top-K,Top-100,Top-500,Top-1000
Source,Eris,Eris,Eris
CSCS by SP,92.0,96.8,97.8
CSCS by IP,98.0,98.8,98.7


CoV-SNN$^1$ (\texttt{SP}) & 51.0\% & 72.6\% & 76.3\% \\
CoV-SNN$^1$ (\texttt{IP}) & 63.0\% & 79.2\% & 79.5\% \\
CoV-SNN$^5$ (\texttt{SP}) & 92.0\% & 96.8\% & 97.8\% \\
CoV-SNN$^5$ (\texttt{IP}) & 98.0\% & 98.8\% & 98.7\% \\


In [6]:
import pandas as pd
def get_gpt_sentences(CHECKPOINT, DISTANCE, K):
    # Get Eris sentences in top-K
    # Reading the data
    df_eris_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_eris.csv")
    df_eris_cscs['cscs_sp'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(grammaticality)']
    df_eris_cscs['cscs_ip'] = df_eris_cscs['log10(semantic_change)'] + df_eris_cscs['log10(1/perplexity)']

    df_new_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_new.csv")
    df_new_cscs['cscs_sp'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(grammaticality)']
    df_new_cscs['cscs_ip'] = df_new_cscs['log10(semantic_change)'] + df_new_cscs['log10(1/perplexity)']

    df_gpt_cscs = pd.read_csv(f"outputs/cscs_CP{CHECKPOINT}_{DISTANCE}/cscs_values_gpt.csv")
    df_gpt_cscs['cscs_sp'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(grammaticality)']
    df_gpt_cscs['cscs_ip'] = df_gpt_cscs['log10(semantic_change)'] + df_gpt_cscs['log10(1/perplexity)']

    # Combine the three DataFrames into one for easier plotting
    df_combined = pd.concat([
        df_eris_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='Eris'),
        df_new_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='New'),
        df_gpt_cscs[['log10(grammaticality)', 'log10(semantic_change)', 'log10(1/perplexity)', 'cscs_sp', 'cscs_ip', 'sentence']].assign(source='GPT'),
    ])

    # Get the top-K sentences
    df_combined_topk = df_combined.sort_values(by='cscs_ip', ascending=False).head(K)
    # Get the Eris sentences
    gpt_sentences = df_combined_topk[df_combined_topk['source'] == 'GPT']['sentence'].tolist()

    # Get the top-K sentences
    df_combined_topk = df_combined.sort_values(by='cscs_sp', ascending=False).head(K)
    # Get the Eris sentences
    gpt_sentences.extend(df_combined_topk[df_combined_topk['source'] == 'GPT']['sentence'].tolist())
    
    # remove duplicates
    gpt_sentences = list(set(gpt_sentences))

    return gpt_sentences


In [62]:
def get_mutations(seq1, seq2):
    # Perform global sequence alignment between two sequences.
    # 5: match score, -4: mismatch penalty, -3: gap opening penalty, -0.1: gap extension penalty.
    # `one_alignment_only=True` ensures that only the best alignment is returned.

    mutations = []
    from Bio import pairwise2
    alignment = pairwise2.align.globalms(
        seq1, seq2, 5, -4, -3, -.1, one_alignment_only=True,
    )[0]
    pos = 0
    for ch1, ch2 in zip(alignment[0], alignment[1]):
        if ch1 != ch2 and ch1 != '-' and ch2 != '-' and ch2 != 'X':
            mutations.append('{}{}{}'.format(ch1, pos + 1, ch2))
        if ch1 != '-':
            pos += 1
    return mutations



# read wt sequence from txt
with open("data/wildtype.txt", "r") as f:
    wt = f.read()
alpha_mut_freqs = [0, 3, 1, 1, 134, 9, 0, 0, 6, 0, 0, 11, 8, 6, 3, 7, 3, 14, 8, 11, 11, 10, 1, 1, 7, 15, 15, 4, 6, 0, 1, 2, 14, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 6, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 2, 1, 4, 0, 3, 6, 0, 0, 4, 0, 7, 1, 2, 9, 16, 0, 1, 1, 3, 0, 2, 2, 0, 0, 0, 0, 5, 0, 2, 0, 0, 0, 19, 21, 2, 2, 68, 2, 4, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 4, 2, 1, 0, 0, 1, 0, 0, 1, 3, 1, 0, 2, 0, 1, 0, 1, 0, 0, 3, 71, 0, 1, 2, 3, 0, 18, 0, 0, 7, 9, 1, 10, 6, 53, 13, 3, 10, 4, 3, 1, 2, 0, 0, 2, 1, 2, 0, 0, 0, 0, 1, 0, 2, 3, 0, 3, 0, 8, 8, 14, 1, 1, 19, 3, 2, 2, 1, 0, 1, 2, 1, 5, 0, 0, 3, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 3, 1, 0, 3, 8, 12, 8, 2, 2, 2, 0, 15, 13, 0, 0, 0, 0, 0, 3, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 9, 0, 4, 3, 0, 8, 5, 1, 2, 1, 0, 3, 1, 4, 5, 4, 7, 2, 6, 1, 5, 8, 3, 5, 3, 1, 0, 3, 0, 0, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1, 5, 0, 0, 5, 0, 1, 2, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 2, 13, 5, 2, 0, 0, 1, 0, 3, 1, 0, 0, 1, 0, 1, 3, 0, 4, 1, 1, 0, 5, 0, 0, 5, 0, 1, 0, 0, 1, 0, 2, 1, 3, 1, 0, 0, 0, 2, 0, 2, 0, 2, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 1, 0, 1, 0, 0, 0, 1, 8, 0, 0, 1, 1, 2, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 6, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 2, 2, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 2, 0, 0, 1, 1, 2, 0, 0, 0, 1, 3, 2, 0, 0, 1, 1, 4, 1, 1, 5, 2, 0, 8, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 0, 5, 4, 2, 0, 0, 4, 4, 9, 2, 2, 0, 0, 0, 3, 19, 1, 1, 0, 0, 1, 25, 0, 0, 5, 14, 0, 7, 0, 1, 1, 0, 1956, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 10, 2, 10, 0, 0, 0, 0, 1, 0, 3, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 5, 0, 2, 0, 0, 2, 2, 1, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1961, 0, 16, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 20, 0, 3, 1, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1982, 0, 2, 0, 3, 1, 3, 3, 13, 7, 0, 0, 3, 3, 0, 0, 1, 3, 2, 0, 0, 1, 0, 1, 3, 1, 15, 2, 0, 1, 0, 0, 0, 4, 0, 0, 0, 1, 0, 15, 7, 8, 0, 0, 5, 0, 3, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 10, 4, 17, 6, 2, 0, 1954, 0, 3, 12, 0, 0, 2, 9, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 24, 2, 2, 6, 2, 5, 0, 0, 1, 1, 0, 6, 0, 0, 0, 1965, 0, 0, 3, 0, 1, 3, 1, 1, 1, 0, 0, 0, 1, 2, 6, 11, 1, 0, 3, 1, 0, 0, 1, 12, 0, 0, 0, 0, 1, 0, 2, 1, 0, 2, 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 8, 0, 0, 1, 3, 0, 3, 2, 0, 0, 0, 0, 0, 1, 3, 2, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 6, 1, 1, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 10, 0, 0, 15, 2, 0, 0, 0, 1, 4, 0, 1, 0, 11, 0, 0, 0, 0, 3, 0, 1, 0, 6, 0, 0, 2, 0, 4, 1, 1, 1, 0, 0, 1, 2, 2, 17, 11, 2, 0, 0, 0, 0, 3, 0, 4, 0, 0, 1, 3, 3, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 5, 0, 0, 1, 0, 2, 1, 0, 17, 0, 0, 0, 0, 2, 1, 3, 0, 1, 0, 3, 1, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 5, 1, 9, 9, 5, 1, 1, 9, 1, 0, 3, 0, 0, 0, 3, 0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1969, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 12, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 0, 2, 0, 0, 1, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 0, 3, 2, 0, 2, 0, 3, 0, 1, 5, 0, 0, 2, 0, 8, 1, 1, 0, 0, 5, 9, 0, 1, 2, 0, 0, 0, 3, 0, 0, 1, 0, 0, 2, 0, 1, 1, 9, 1, 0, 11, 0, 0, 0, 0, 0, 1, 6, 0, 2, 0, 0, 2, 0, 1963, 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 2, 2, 0, 0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 8, 0, 2, 2, 0, 0, 0, 0, 0, 15, 7, 0, 3, 0, 4, 1, 4, 2, 2, 0, 0, 1, 0, 9, 1, 0, 0, 0, 3, 2, 0, 2, 2, 1, 2, 1, 2, 2, 89, 1, 0, 0, 2, 0, 1, 0, 3, 2, 6, 2, 3, 1, 4, 0, 7, 5, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 28, 1, 0, 3, 1, 0, 5, 0, 2, 7, 11, 5, 3, 1, 0, 0, 4, 1, 8, 2, 0, 0, 1, 2, 3, 1, 2, 0, 9, 1, 3, 0, 6, 4, 0, 1, 0, 0, 0, 4, 2, 6, 1, 3, 20, 9, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
beta_mut_freqs = [0, 0, 1, 1, 47, 1, 0, 2, 3, 0, 1, 4, 2, 2, 3, 1, 0, 897, 46, 5, 7, 0, 2, 0, 18, 31, 250, 0, 2, 2, 0, 0, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 3, 2, 1, 1, 10, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 51, 1, 9, 3, 2, 6, 1, 0, 31, 2, 3, 2, 0, 1914, 0, 1, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 1, 1, 8, 1, 1, 32, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 23, 0, 0, 1, 2, 15, 0, 1, 6, 1, 2, 1, 2, 4, 19, 7, 0, 2, 0, 4, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 3, 1, 0, 0, 1, 1, 13, 6, 1, 0, 2, 6, 0, 1, 0, 1, 17, 0, 1, 0, 4, 0, 0, 3, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 5, 0, 0, 1, 0, 1848, 0, 0, 3, 0, 0, 5, 33, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 1, 0, 2, 3, 0, 0, 12, 5, 1, 0, 0, 2, 5, 2, 4, 6, 4, 0, 4, 0, 5, 0, 9, 1, 4, 2, 0, 0, 1, 1, 0, 1, 3, 1, 1, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 3, 0, 0, 0, 2, 0, 0, 3, 3, 12, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 4, 0, 3, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 5, 0, 0, 1, 1, 0, 46, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 1839, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 4, 3, 21, 0, 0, 0, 1, 0, 1854, 0, 1, 0, 0, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 0, 0, 1898, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 2, 0, 0, 8, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 9, 0, 10, 1, 3, 3, 0, 0, 0, 0, 0, 0, 0, 6, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1978, 0, 0, 0, 3, 2, 0, 4, 3, 7, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 9, 4, 7, 0, 2, 3, 0, 1, 2, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 9, 0, 0, 33, 3, 37, 7, 6, 2, 32, 4, 0, 3, 0, 0, 1, 9, 24, 3, 1, 1, 0, 4, 0, 0, 0, 9, 2, 0, 1926, 0, 0, 3, 4, 4, 0, 0, 0, 1, 1, 0, 0, 0, 0, 24, 2, 0, 5, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 1, 7, 0, 2, 2, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 5, 0, 0, 1, 4, 0, 0, 0, 7, 2, 3, 0, 0, 8, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 3, 1, 2, 1, 1, 6, 3, 0, 1, 0, 0, 2, 0, 0, 0, 8, 0, 0, 1, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 2, 22, 5, 1, 1, 0, 2, 0, 1, 0, 4, 0, 0, 1, 0, 10, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 2, 0, 1, 0, 3, 0, 0, 0, 6, 0, 0, 0, 19, 0, 0, 23, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 2, 1, 20, 0, 0, 27, 0, 10, 2, 4, 12, 5, 2, 1, 6, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 17, 0, 1, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 6, 0, 0, 2, 0, 0, 1, 0, 12, 2, 0, 0, 0, 0, 2, 4, 1, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 3, 9, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 4, 15, 0, 1, 0, 2, 0, 12, 2, 0, 1, 0, 0, 3, 1, 0, 1, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 1, 3, 0, 1, 5, 1, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 10, 9, 0, 1, 0, 19, 1, 1, 0, 3, 0, 1, 0, 1, 16, 2, 0, 1, 0, 3, 1, 2, 1, 1, 0, 1, 0, 0, 4, 3, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 2, 4, 0, 3, 0, 3, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 9, 0, 0, 0, 0, 1, 0, 1, 1, 1, 9, 1, 0, 7, 0, 1, 2, 5, 8, 0, 1, 0, 0, 0, 6, 0, 2, 3, 2, 0, 0, 2, 1, 12, 1, 1, 1, 0, 0, 0, 1, 17, 0, 0, 9, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
gamma_mut_freqs = [0, 0, 7, 0, 57, 7, 1, 2, 2, 0, 2, 16, 1, 5, 1, 4, 0, 1918, 3, 1911, 0, 9, 1, 0, 0, 1931, 0, 0, 6, 2, 0, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 1, 5, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 18, 3, 4, 5, 1, 0, 0, 2, 25, 13, 2, 9, 0, 3, 1, 0, 2, 2, 1, 0, 1, 4, 0, 2, 0, 0, 0, 1, 10, 1, 7, 6, 0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 2, 1, 5, 1, 0, 0, 0, 0, 0, 0, 13, 1, 0, 2, 0, 0, 0, 1, 3, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1807, 0, 0, 2, 2, 2, 1, 2, 5, 2, 2, 0, 3, 3, 10, 10, 0, 0, 1, 2, 1, 0, 0, 1, 0, 1, 2, 0, 0, 0, 1, 1, 0, 0, 0, 3, 5, 0, 5, 5, 2, 8, 2, 14, 0, 9, 1, 6, 1, 0, 0, 0, 1870, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 1, 1, 1, 0, 3, 5, 10, 15, 11, 0, 2, 1, 0, 3, 0, 3, 0, 0, 1, 1, 4, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 8, 10, 0, 3, 22, 3, 0, 0, 2, 5, 1, 7, 6, 11, 3, 12, 9, 2, 1, 9, 13, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 9, 0, 1, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 2, 2, 0, 0, 3, 6, 3, 1, 0, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 5, 12, 2, 0, 0, 1, 0, 1, 2, 0, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 5, 0, 3, 0, 5, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 1, 0, 2, 0, 3, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 3, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1855, 0, 1, 0, 0, 0, 0, 0, 0, 1, 32, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 8, 1, 3, 0, 1, 7, 1, 0, 9, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 19, 2, 0, 0, 0, 3, 1, 3, 11, 0, 0, 0, 1, 0, 1924, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1956, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 5, 1, 3, 6, 5, 8, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 1, 2, 0, 0, 2, 1, 1, 0, 7, 0, 0, 0, 1, 6, 2, 4, 0, 1, 0, 9, 0, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 9, 0, 15, 5, 1, 0, 0, 0, 0, 0, 3, 0, 1, 13, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 1, 0, 1, 1, 0, 0, 0, 0, 3, 1, 2, 5, 2, 1, 1, 0, 0, 0, 1964, 0, 0, 0, 2, 4, 0, 1, 9, 9, 1, 3, 5, 5, 0, 0, 0, 4, 1, 0, 1, 1, 0, 1, 4, 2, 26, 0, 5, 0, 1, 0, 1, 12, 0, 0, 0, 3, 0, 15, 0, 1962, 0, 1, 5, 5, 0, 34, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 9, 3, 0, 64, 1, 30, 2, 58, 0, 209, 0, 1, 10, 0, 0, 10, 53, 8, 0, 9, 3, 2, 0, 0, 0, 1, 0, 0, 0, 12, 2, 1, 12, 3, 0, 0, 3, 0, 0, 0, 1, 0, 1, 0, 17, 0, 0, 3, 0, 1, 1, 0, 0, 1, 3, 0, 0, 0, 0, 3, 5, 0, 0, 1, 0, 0, 1, 1, 3, 0, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 1, 5, 4, 0, 0, 0, 0, 0, 1, 3, 3, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 5, 1, 0, 1, 1, 4, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 3, 2, 2, 13, 15, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 1, 2, 1, 2, 0, 0, 4, 0, 1, 0, 1, 0, 0, 0, 1, 1, 5, 38, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 6, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 1, 0, 1, 0, 0, 8, 0, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 5, 0, 1, 2, 0, 1, 0, 22, 0, 8, 10, 1, 0, 3, 6, 0, 1, 5, 2, 0, 2, 3, 0, 1, 0, 1, 0, 0, 4, 2, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 13, 1, 0, 2, 1, 0, 0, 1930, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 7, 0, 0, 8, 1, 0, 1, 0, 13, 1, 0, 0, 0, 2, 1, 18, 0, 1, 0, 0, 0, 3, 2, 0, 1, 0, 2, 1, 0, 0, 0, 8, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, 1, 1, 5, 18, 0, 0, 0, 8, 1, 2, 3, 0, 3, 1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 8, 1, 0, 9, 0, 1, 0, 0, 0, 5, 0, 0, 6, 8, 2, 3, 0, 8, 1, 1, 2, 7, 0, 1, 0, 0, 1929, 0, 2, 1, 2, 2, 0, 0, 5, 3, 1, 1, 4, 0, 1, 15, 1, 0, 0, 1, 1, 0, 0, 6, 0, 3, 7, 3, 0, 3, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 17, 0, 0, 0, 1, 2, 2, 1, 0, 19, 9, 3, 0, 3, 2, 0, 20, 0, 10, 0, 1, 0, 1, 4, 4, 1, 0, 2, 7, 3, 0, 7, 2, 25, 0, 1, 0, 0, 1, 11, 2, 6, 0, 0, 15, 14, 1, 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
delta_mut_freqs = [0, 3, 2, 0, 57, 7, 0, 1, 0, 1, 0, 8, 5, 3, 1, 5, 2, 0, 1913, 0, 0, 0, 3, 2, 7, 10, 18, 2, 31, 4, 0, 3, 0, 0, 2, 11, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 6, 1, 7, 6, 0, 10, 0, 0, 0, 1, 0, 0, 1, 2, 1, 3, 0, 2, 11, 4, 11, 29, 1, 3, 0, 0, 13, 2, 11, 5, 1, 14, 0, 2, 6, 0, 5, 0, 0, 1, 0, 1, 1, 0, 2, 2, 832, 4, 12, 10, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 1, 0, 2, 45, 0, 0, 1, 0, 0, 3, 1, 0, 3, 0, 3, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 19, 0, 0, 0, 1183, 0, 0, 64, 5, 5, 7, 1, 0, 3, 8, 11, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 2, 0, 1, 0, 1, 1, 2, 1, 14, 9, 0, 8, 8, 9, 2, 5, 45, 3, 3, 2, 2, 2, 1, 1, 2, 5, 0, 0, 2, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 0, 0, 0, 1, 2, 2, 4, 2, 0, 10, 11, 22, 5, 0, 4, 1, 0, 23, 269, 0, 3, 0, 3, 4, 5, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 1, 7, 0, 2, 1, 2, 4, 1, 1, 4, 2, 28, 31, 3, 9, 12, 19, 4, 1, 23, 4, 4, 2, 16, 3, 3, 1, 0, 2, 0, 0, 0, 1, 0, 6, 1, 0, 0, 0, 1, 0, 0, 8, 2, 0, 2, 1, 0, 2, 1, 35, 0, 0, 1, 1, 1, 0, 0, 0, 0, 10, 1, 0, 0, 0, 0, 0, 0, 4, 6, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 3, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, 0, 1, 2, 3, 1, 1, 0, 0, 3, 1, 6, 0, 3, 1, 0, 1, 2, 0, 5, 0, 3, 2, 0, 2, 0, 0, 3, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 0, 1, 6, 1, 2, 0, 0, 1, 2, 0, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 0, 4, 0, 0, 2, 0, 0, 5, 0, 0, 7, 1, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 1, 0, 0, 1, 2, 1, 0, 0, 0, 0, 3, 0, 0, 0, 3, 1, 25, 0, 0, 0, 0, 0, 1965, 0, 0, 2, 1, 0, 3, 3, 0, 1, 0, 2, 0, 1, 1, 0, 0, 1, 5, 1, 0, 0, 0, 2, 2, 0, 1946, 0, 0, 2, 2, 5, 37, 2, 2, 0, 0, 1, 6, 0, 0, 1, 7, 0, 0, 0, 1, 6, 1, 8, 0, 6, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 4, 0, 1, 1, 1, 0, 7, 2, 14, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 13, 0, 1, 0, 1, 1, 0, 8, 0, 4, 0, 5, 0, 0, 2, 0, 0, 0, 0, 1, 1, 0, 4, 10, 0, 13, 5, 9, 3, 1, 0, 0, 0, 2, 0, 0, 18, 2, 2, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 7, 0, 1, 3, 0, 0, 0, 0, 0, 0, 1950, 0, 1, 0, 0, 2, 0, 3, 7, 5, 0, 1, 1, 2, 3, 1, 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 9, 2, 0, 0, 0, 1, 2, 4, 0, 0, 0, 0, 0, 4, 6, 10, 1, 0, 3, 0, 1, 5, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 10, 4, 32, 6, 3, 0, 1965, 0, 0, 10, 0, 0, 0, 14, 2, 0, 3, 0, 0, 3, 0, 0, 1, 2, 0, 0, 8, 3, 0, 4, 1, 2, 0, 0, 1, 1, 0, 1, 1, 0, 1, 2, 0, 0, 11, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 1, 0, 0, 0, 3, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 4, 0, 6, 2, 0, 0, 0, 0, 1, 0, 2, 3, 0, 0, 4, 0, 0, 1, 1, 0, 1, 0, 14, 2, 4, 0, 0, 3, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 2, 7, 22, 1, 0, 9, 5, 1, 2, 0, 0, 3, 0, 0, 1, 2, 0, 1, 0, 3, 0, 0, 2, 1, 8, 0, 0, 7, 2, 1, 0, 2, 0, 0, 0, 3, 1, 1, 13, 9, 2, 0, 0, 36, 0, 0, 0, 2, 4, 1, 0, 1, 15, 2, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 16, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 0, 0, 9, 1, 0, 1, 0, 3, 0, 11, 1, 5, 11, 4, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1770, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 18, 2, 1, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 2, 3, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 5, 0, 2, 0, 0, 2, 2, 5, 1, 1, 12, 20, 2, 0, 1, 24, 2, 0, 1, 0, 7, 4, 3, 1, 4, 0, 2, 0, 7, 0, 0, 1, 0, 1, 0, 0, 1, 0, 7, 3, 2, 59, 0, 0, 1, 0, 1, 0, 2, 3, 3, 1, 1, 0, 12, 5, 1, 1, 1, 4, 0, 24, 0, 0, 4, 2, 0, 0, 1, 1, 7, 0, 0, 1, 2, 0, 3, 0, 4, 0, 0, 3, 0, 3, 0, 0, 2, 9, 3, 0, 7, 3, 1, 1, 1, 2, 3, 1, 0, 17, 7, 0, 2, 0, 22, 1, 1, 1, 1, 0, 1, 1, 0, 7, 1, 1, 1, 0, 3, 1, 1, 2, 2, 0, 0, 2, 0, 0, 7, 1, 0, 1, 3, 0, 0, 0, 3, 2, 6, 8, 3, 0, 3, 1, 2, 5, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 22, 1, 1, 0, 1, 0, 2, 0, 1, 13, 6, 1, 3, 0, 3, 3, 3, 4, 14, 3, 1, 1, 0, 4, 0, 4, 2, 2, 12, 3, 1, 7, 3, 5, 2, 1, 0, 0, 1, 0, 12, 9, 0, 2, 6, 93, 8, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
omic_mut_freqs = [0, 0, 10, 2, 55, 1, 0, 3, 3, 0, 1, 1, 1, 1, 0, 1, 2, 0, 1647, 0, 0, 2, 0, 1, 9, 2, 0, 0, 3, 0, 0, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 16, 0, 1, 23, 33, 2, 8, 1, 3, 0, 0, 6, 4, 3, 5, 1, 2, 0, 2, 88, 1, 0, 2, 1, 2, 0, 2, 0, 0, 1, 1, 228, 3, 1, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1623, 0, 1, 6, 59, 245, 0, 0, 11, 5, 237, 30, 0, 0, 0, 252, 0, 2, 0, 0, 0, 0, 31, 0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 0, 17, 4, 2, 3, 12, 6, 6, 92, 6, 20, 8, 3, 1, 0, 5, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 3, 11, 236, 6, 6, 1687, 0, 20, 5, 1, 2, 2, 0, 0, 11, 0, 2, 0, 0, 2, 1, 1, 0, 1, 0, 0, 4, 0, 1, 5, 3, 0, 2, 1, 1, 1, 0, 31, 1, 1, 31, 0, 2, 8, 73, 17, 9, 10, 10, 256, 0, 4, 0, 4, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1945, 0, 1, 0, 0, 2, 0, 550, 0, 4, 0, 0, 0, 1, 0, 3, 0, 95, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 98, 0, 0, 1872, 0, 1939, 0, 218, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1727, 0, 0, 1574, 0, 1, 2, 0, 0, 0, 1, 0, 1884, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 1, 0, 0, 0, 0, 1734, 0, 0, 2, 205, 7, 449, 1, 0, 5, 39, 0, 622, 0, 0, 7, 2, 0, 0, 0, 468, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 2, 0, 8, 32, 0, 0, 1, 1, 0, 1927, 0, 578, 0, 0, 1, 197, 0, 0, 1189, 6, 0, 205, 0, 1929, 0, 0, 1927, 0, 0, 0, 1924, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 4, 5, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 230, 0, 0, 0, 0, 1, 2, 7, 0, 3, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 6, 9, 23, 2, 0, 2, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 1977, 0, 0, 0, 3, 6, 0, 5, 1, 1, 1, 0, 2, 1, 5, 0, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 4, 2, 3, 3, 0, 1, 2, 1, 0, 0, 0, 1, 0, 2, 0, 1975, 0, 0, 7, 0, 1, 2, 0, 0, 0, 0, 4, 1, 2, 0, 2, 0, 3, 2, 0, 8, 2, 7, 0, 1937, 0, 1905, 0, 2, 4, 3, 0, 1, 8, 0, 0, 3, 1, 0, 4, 1, 0, 1, 1, 0, 0, 22, 0, 9, 109, 1, 4, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 4, 1, 4, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 5, 1, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1910, 0, 0, 0, 1, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 4, 0, 0, 1, 0, 1947, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 3, 1, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 6, 1, 0, 1, 2, 0, 0, 2, 1, 1, 1, 5, 5, 3, 0, 0, 2, 0, 2, 0, 3, 2, 216, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 6, 0, 1, 1, 7, 1, 0, 0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 8, 1, 1, 2, 0, 0, 2, 14, 0, 2, 8, 0, 0, 5, 3, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 1972, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1974, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 223, 3, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 19, 0, 0, 1, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 3, 1, 1, 2, 0, 3, 0, 0, 1, 0, 3, 5, 1, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 3, 0, 4, 0, 1, 5, 0, 0, 0, 0, 0, 0, 2, 0, 4, 3, 0, 1, 5, 2, 0, 1, 1, 3, 1, 5, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 1, 0, 0, 0, 12, 0, 0, 1, 5, 4, 0, 3, 3, 0, 1, 0, 1, 0, 3, 0, 0, 0, 2, 0, 0, 1, 0, 18, 1, 4, 1, 0, 2, 3, 1, 2, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 0, 3, 3, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 13, 1, 7, 10, 0, 0, 1, 0, 0, 6, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 10, 0, 0, 2, 1, 0, 0, 2, 5, 1, 2, 2, 0, 3, 1, 3, 4, 1, 0, 1, 0, 1, 6, 1, 2, 1, 2, 8, 0, 2, 1, 9, 1, 1, 0, 0, 0, 3, 1, 4, 4, 1, 12, 10, 4, 2, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]
eris_mut_freqs = [0, 0, 2, 0, 33, 1, 0, 0, 2, 0, 0, 2, 0, 145, 0, 1, 2, 0, 1793, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 1, 4, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 2, 0, 2027, 0, 4, 0, 0, 0, 0, 4, 0, 1, 2, 0, 4, 1, 2, 7, 1, 6, 3, 1, 3, 0, 2, 7, 9, 4, 12, 2, 9, 2, 0, 2055, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 4, 0, 4, 16, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 1, 1, 0, 1359, 0, 0, 0, 1284, 0, 15, 0, 2, 0, 5, 2, 2, 5, 2, 488, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 6, 4, 0, 9, 13, 0, 1968, 0, 8, 11, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 3, 0, 2060, 0, 16, 3, 2, 3, 1, 0, 3, 4, 0, 1, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 0, 5, 0, 0, 3, 0, 1, 0, 1, 8, 0, 0, 1, 0, 8, 0, 2000, 0, 4, 0, 13, 14, 4, 2, 2, 13, 17, 4, 6, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 12, 0, 0, 0, 0, 0, 1, 0, 0, 5, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 0, 52, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 10, 0, 1, 0, 2062, 0, 0, 0, 0, 6, 0, 2066, 0, 3, 1, 0, 1, 1, 0, 6, 0, 9, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2073, 0, 0, 2026, 0, 1955, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 2074, 0, 0, 2041, 0, 0, 1, 0, 0, 0, 0, 0, 2008, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2067, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 336, 0, 0, 19, 1230, 0, 0, 0, 2078, 0, 0, 1, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 11, 20, 0, 2, 9, 0, 0, 12, 22, 0, 1957, 0, 1957, 0, 0, 0, 2069, 0, 0, 4, 24, 0, 1, 0, 2073, 0, 0, 2076, 0, 0, 0, 2078, 0, 0, 0, 0, 1, 0, 1, 1, 7, 0, 0, 6, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 1, 16, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 7, 1, 0, 0, 0, 3, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 2081, 0, 0, 0, 0, 2, 0, 8, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 0, 2046, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 16, 4, 5, 0, 2070, 0, 2074, 0, 1, 3, 0, 0, 2, 7, 2, 0, 3, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2, 1, 3, 210, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2065, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 0, 2, 1, 1, 0, 0, 2062, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 2, 9, 3, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 0, 1, 0, 0, 0, 2, 0, 4, 8, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 6, 10, 1, 0, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2065, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2079, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, 0, 0, 0, 5, 0, 2, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 2, 0, 6, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 3, 4, 2, 2, 2, 8, 0, 0, 0, 0, 4, 0, 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 1, 1, 0, 30, 0, 0, 0, 0, 0, 3, 0, 0, 13, 1, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 4, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 2, 4, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 1, 1, 0, 2, 4, 0, 0, 0, 0, 2, 0, 2, 4, 0, 1, 0, 1, 1, 2, 0, 2, 1, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 5, 11, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

# get indices of mutations with frequency > 1000
import numpy as np
eris_highly_variable_residues = [i for i, freq in enumerate(eris_mut_freqs) if freq > np.median(eris_mut_freqs)]
omic_highly_variable_residues = [i for i, freq in enumerate(omic_mut_freqs) if freq > np.median(omic_mut_freqs)]
# get union of highly variable residues
highly_variable_residues = list(set(eris_highly_variable_residues + omic_highly_variable_residues))
print("eris highly variable positons:", len(eris_highly_variable_residues))
print("omic highly variable positons:", len(omic_highly_variable_residues))
print("comb highly variable positons:", len(highly_variable_residues))

def print_mutation_stats(K):
    eris = get_gpt_sentences("4", "L2", K)
    mutations = []
    for e in eris:
        mutations.extend(get_mutations(wt, e))

    # remove duplicates
    mutations = list(set(mutations))

    

    # get number of mutations in RBD (positions between 317 and 541)
    rbd_mutations = [m for m in mutations if int(m[1:-1]) >= 317 and int(m[1:-1]) <= 541]
    highly_variable_residue_mutations = [m for m in mutations if int(m[1:-1]) in highly_variable_residues and (int(m[1:-1]) < 317 or int(m[1:-1]) > 541)]
    
    # get mutation those are neither in RBD nor in highly variable residues
    other_mutations = [m for m in mutations if m not in rbd_mutations and m not in highly_variable_residue_mutations]

    print(f"-------- TOP {K} --------")
    print(rbd_mutations)
    print(highly_variable_residue_mutations)
    print(other_mutations)
    print(f"{len(rbd_mutations)} of {len(mutations)} mutations are in RBD")
    print(f"{len(highly_variable_residue_mutations)} of {len(mutations)} mutations are in highly variable positions out of RBD")
    print(f"{len(other_mutations)} of {len(mutations)} mutations are in other positions\n")

eris highly variable positons: 396
omic highly variable positons: 522
comb highly variable positons: 648


In [63]:
print_mutation_stats(100)
print_mutation_stats(500)
print_mutation_stats(1000)

-------- TOP 100 --------
['Y505H', 'S371F', 'N501Y', 'N440K', 'K417N', 'D405N', 'Q498R', 'E484A', 'S373P', 'Q493R', 'G339D', 'R408S']
['E1258Q', 'Q1113K', 'S691F', 'S691P']
['N764K', 'N969K', 'D796Y', 'T19I', 'G142D', 'C1248F', 'H655Y', 'N679K', 'D614G', 'V213G', 'Q954H', 'P681H']
12 of 28 mutations are in RBD
4 of 28 mutations are in highly variable positions out of RBD
12 of 28 mutations are in other positions

-------- TOP 500 --------
['Y505H', 'G446S', 'S371F', 'N501Y', 'N440K', 'K417N', 'D405N', 'Q498R', 'E484A', 'K356T', 'N460K', 'S373P', 'F490S', 'Q493R', 'G339D', 'R346T', 'R408S', 'G339H']
['W152R', 'K147E', 'F157L', 'G257S', 'E1258Q', 'I210V', 'Q1113K', 'S691F', 'S691P']
['N764K', 'G1124V', 'N969K', 'D796Y', 'T19I', 'G142D', 'C1248F', 'Q52H', 'H655Y', 'N679K', 'D614G', 'V213G', 'Q954H', 'P681H']
18 of 41 mutations are in RBD
9 of 41 mutations are in highly variable positions out of RBD
14 of 41 mutations are in other positions

-------- TOP 1000 --------
['Y505H', 'G446S', '