## Spider Results Jaccard Scores

### 1-Shot

In [5]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'spider', '**', '*results-[1].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-1.json and all-MiniLM-L6-v2-results-1.json: 0.930
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and all-mpnet-base-v2-results-1.json: 0.882
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and bert-base-nli-mean-tokens-results-1.json: 0.893
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and random-results-1.json: 0.834
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and stsb-roberta-base-results-1.json: 0.890
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-3-large-results-1.json: 0.868
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-3-small-results-1.json: 0.881
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-ada-002-results-1.json: 0.872
Jaccard similarity between all-MiniLM-L6-v2-results-1.json and all-mpnet-base-v2-results-1.json: 0.887
Jaccard similarity between all-MiniLM-L6-v2-results-1.

### 3-shot

In [6]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'spider', '**', '*results-[3].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-3.json and all-MiniLM-L6-v2-results-3.json: 0.910
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and all-mpnet-base-v2-results-3.json: 0.882
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and bert-base-nli-mean-tokens-results-3.json: 0.883
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and random-results-3.json: 0.857
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and stsb-roberta-base-results-3.json: 0.891
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-3-large-results-3.json: 0.894
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-3-small-results-3.json: 0.892
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-ada-002-results-3.json: 0.872
Jaccard similarity between all-MiniLM-L6-v2-results-3.json and all-mpnet-base-v2-results-3.json: 0.879
Jaccard similarity between all-MiniLM-L6-v2-results-3.

### 5-Shot

In [7]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'spider', '**', '*results-[5].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-5.json and all-MiniLM-L6-v2-results-5.json: 0.900
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and all-mpnet-base-v2-results-5.json: 0.893
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and bert-base-nli-mean-tokens-results-5.json: 0.892
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and random-results-5.json: 0.836
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and roBERTa-results-5.json: 0.895
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and stsb-roberta-base-results-5.json: 0.895
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-3-large-results-5.json: 0.886
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-3-small-results-5.json: 0.885
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-ada-002-results-5.json: 0.870
Jaccard similarity between all-MiniLM-L6-v2-results-5.json and 

# BIRD Results Jaccard Scores

### 1-Shot

In [1]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'bird', '**', '*results-[1].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-1.json and all-MiniLM-L6-v2-results-1.json: 0.733
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and all-mpnet-base-v2-results-1.json: 0.730
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and bert-base-nli-mean-tokens-results-1.json: 0.728
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and random-results-1.json: 0.684
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and stsb-roberta-base-results-1.json: 0.714
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-3-large-results-1.json: 0.708
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-3-small-results-1.json: 0.702
Jaccard similarity between all-MiniLM-L12-v2-results-1.json and text-embedding-ada-002-results-1.json: 0.711
Jaccard similarity between all-MiniLM-L6-v2-results-1.json and all-mpnet-base-v2-results-1.json: 0.715
Jaccard similarity between all-MiniLM-L6-v2-results-1.

### 3-Shot

In [2]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'bird', '**', '*results-[3].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-3.json and all-MiniLM-L6-v2-results-3.json: 0.726
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and all-mpnet-base-v2-results-3.json: 0.696
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and bert-base-nli-mean-tokens-results-3.json: 0.692
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and random-results-3.json: 0.653
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and stsb-roberta-base-results-3.json: 0.705
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-3-large-results-3.json: 0.714
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-3-small-results-3.json: 0.708
Jaccard similarity between all-MiniLM-L12-v2-results-3.json and text-embedding-ada-002-results-3.json: 0.670
Jaccard similarity between all-MiniLM-L6-v2-results-3.json and all-mpnet-base-v2-results-3.json: 0.709
Jaccard similarity between all-MiniLM-L6-v2-results-3.

### 5-Shot

In [3]:
import json
import os
import glob
import itertools

# Define the pattern to match JSON result files
pattern = os.path.join('chapter-3', 'results', 'bird', '**', '*results-[5].json')

# Retrieve all matching file paths
results_files = glob.glob(pattern, recursive=True)

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Load all result data
results_data = {os.path.basename(file_path): load_json(file_path) for file_path in results_files}

# Function to calculate Jaccard similarity
def calculate_jaccard(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Extract correct answer sets for each file
correct_answer_sets = {
    file: set(q['index'] for q in data['questions'] if q['correct'] == 1)
    for file, data in results_data.items()
}

# Calculate pairwise Jaccard similarities
pairwise_jaccard = {}
for (file1, set1), (file2, set2) in itertools.combinations(correct_answer_sets.items(), 2):
    jaccard_score = calculate_jaccard(set1, set2)
    pairwise_jaccard[(file1, file2)] = jaccard_score

# Calculate the average Jaccard similarity
average_jaccard = sum(pairwise_jaccard.values()) / len(pairwise_jaccard) if pairwise_jaccard else 0

# Print the results
for pair, score in pairwise_jaccard.items():
    print(f"Jaccard similarity between {pair[0]} and {pair[1]}: {score:.3f}")

print(f"\nAverage Jaccard similarity across all pairs: {average_jaccard:.3f}")

Jaccard similarity between all-MiniLM-L12-v2-results-5.json and all-MiniLM-L6-v2-results-5.json: 0.741
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and all-mpnet-base-v2-results-5.json: 0.725
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and bert-base-nli-mean-tokens-results-5.json: 0.723
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and random-results-5.json: 0.671
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and stsb-roberta-base-results-5.json: 0.715
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-3-large-results-5.json: 0.720
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-3-small-results-5.json: 0.728
Jaccard similarity between all-MiniLM-L12-v2-results-5.json and text-embedding-ada-002-results-5.json: 0.713
Jaccard similarity between all-MiniLM-L6-v2-results-5.json and all-mpnet-base-v2-results-5.json: 0.708
Jaccard similarity between all-MiniLM-L6-v2-results-5.