Generate tables for static word embeddings related experiments.

In [1]:
import os
import pandas as pd
import json

def get_score(model_name, pooling_name, task):
    root_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    path = os.path.join(root_path, "results", model_name, task, pooling_name , f"{task}.json")
    with open(path, "r") as f:
        data = json.load(f)
    return data['validation'][sim][cor] if task == "JSTS" else data['test'][sim][cor]

sim = "cos_sim"
cor = "spearman"

# example usage
get_score("GoogleNews-vectors-negative300-torch", "uniform_whitening/uniform_centering_then_zipfian_whitening_norm", "STS12")

0.5901050766815591

In [2]:
def create_pandas_tables(model_names, pooling_names, tasks):
    """
    Creates a dictionary of pandas DataFrames, one per model_name.
    Each DataFrame has:
      - Rows indexed by pooling_names (in the given order).
      - Columns = tasks list + 'Average' column.
    """
    results = {}
    for model_name in model_names:
        # Collect scores in a 2D list where each row corresponds
        # to a specific pooling_name, and each column to a task.
        table_data = []
        for pooling_name in pooling_names:
            row_scores = []
            for task in tasks:
                score = get_score(model_name, pooling_name, task)
                row_scores.append(score)
            table_data.append(row_scores)

        # Create a DataFrame from the collected data
        df = pd.DataFrame(table_data, index=pooling_names, columns=tasks)
        # Calculate the average score across all tasks
        df["Average"] = df.mean(axis=1)
        # (Optional) name your index for clarity
        df.index.name = "Pooling Name"

        df = df.mul(100).round(2)

        results[model_name] = df

    return results

# Table 8 (enwiki)

In [3]:
model_names = [
    "GoogleNews-vectors-negative300-torch", # word2vec
    "average_word_embeddings_glove.840B.300d", # glove
    "fasttext-en-torch",
    "fasttext-en-subword-torch",
]

pooling_names = [
    "normal/mean",
    "uniform_whitening/centering_only",
    "uniform_whitening/whitening",
    "zipfian_whitening/centering_only",
    "zipfian_whitening/whitening",
    "abtp/component_removal",
    "sif/sif_w_component_removal",
]

tasks = [
    "STS12",
    "STS13",
    "STS14",
    "STS15",
    "STS16",
    "SICK-R",
    "STSBenchmark",
]

sim = "cos_sim"
cor = "spearman"

all_results = create_pandas_tables(model_names, pooling_names, tasks)

In [4]:
# glove
all_results["average_word_embeddings_glove.840B.300d"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,56.46,50.41,51.13,58.6,49.03,57.01,46.17,52.69
uniform_whitening/centering_only,55.54,46.32,49.67,56.03,46.9,56.44,45.17,50.87
uniform_whitening/whitening,53.31,62.45,57.93,68.68,58.69,57.92,52.21,58.74
zipfian_whitening/centering_only,54.52,69.2,60.87,69.82,62.61,58.01,52.25,61.04
zipfian_whitening/whitening,57.76,72.22,67.04,76.8,71.72,61.8,66.92,67.75
abtp/component_removal,52.67,67.38,59.4,69.53,60.71,58.56,54.28,60.36
sif/sif_w_component_removal,60.23,68.78,62.39,67.26,61.85,56.91,58.7,62.3


In [5]:
# word2vec
all_results["GoogleNews-vectors-negative300-torch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,58.57,68.64,63.65,71.73,61.79,61.77,56.98,63.3
uniform_whitening/centering_only,58.17,67.34,62.19,70.15,59.6,61.39,55.85,62.1
uniform_whitening/whitening,56.53,66.95,62.77,72.42,61.05,62.74,56.03,62.64
zipfian_whitening/centering_only,56.89,69.95,65.08,73.91,65.71,62.18,58.84,64.65
zipfian_whitening/whitening,56.16,70.33,67.2,76.6,70.99,62.52,66.5,67.19
abtp/component_removal,55.53,69.32,63.13,72.25,60.98,62.02,56.98,62.89
sif/sif_w_component_removal,60.05,73.26,66.87,74.32,67.64,59.22,63.04,66.34


In [6]:
# fasttext-en-torch
all_results["fasttext-en-torch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,57.94,68.97,62.37,72.26,63.59,59.99,59.82,63.56
uniform_whitening/centering_only,59.73,55.02,55.16,64.22,53.39,58.85,52.46,56.98
uniform_whitening/whitening,52.47,59.01,53.9,65.33,52.61,58.34,48.6,55.75
zipfian_whitening/centering_only,58.3,71.69,64.57,74.1,67.59,60.75,59.4,65.2
zipfian_whitening/whitening,58.86,73.85,68.43,78.07,74.0,62.85,69.55,69.37
abtp/component_removal,58.35,69.09,60.82,71.99,60.76,60.34,57.02,62.62
sif/sif_w_component_removal,61.54,76.95,68.39,76.98,70.27,59.52,67.08,68.67


In [7]:
# fasttext-en-subword-torch
all_results["fasttext-en-subword-torch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,49.1,47.34,51.94,61.99,51.54,53.6,50.43,52.28
uniform_whitening/centering_only,49.21,43.13,49.89,62.03,49.7,54.56,46.91,50.78
uniform_whitening/whitening,45.12,41.0,47.3,62.08,48.85,54.8,43.55,48.96
zipfian_whitening/centering_only,48.68,55.03,54.07,60.23,58.41,54.64,50.38,54.49
zipfian_whitening/whitening,61.22,60.68,63.18,73.59,69.87,59.82,68.2,65.22
abtp/component_removal,49.64,41.79,48.81,60.84,47.57,55.09,44.23,49.71
sif/sif_w_component_removal,57.28,54.5,60.77,68.82,61.63,56.83,60.36,60.03


# Table 9 (test set frequency)

In [8]:
model_names = [
    "GoogleNews-vectors-negative300-torch_in_batch",
    "average_word_embeddings_glove.840B.300d_in_batch", 
    "fasttext-en-torch_in_batch",
    "fasttext-en-subword-torch_in_batch",
]

pooling_names = [
    "normal/mean",
    "uniform_whitening/centering_only",
    "uniform_whitening/whitening",
    "zipfian_whitening/centering_only",
    "zipfian_whitening/whitening",
    "abtp/component_removal",
]

tasks = [
    "STS12",
    "STS13",
    "STS14",
    "STS15",
    "STS16",
    "SICK-R",
    "STSBenchmark",
]

sim = "cos_sim"
cor = "spearman"

all_results = create_pandas_tables(model_names, pooling_names, tasks)

In [9]:
# glove
all_results["average_word_embeddings_glove.840B.300d_in_batch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,57.71,50.29,50.61,58.38,48.76,56.76,46.22,52.67
uniform_whitening/centering_only,56.32,61.17,52.68,64.8,55.8,57.98,47.94,56.67
uniform_whitening/whitening,51.67,60.94,57.14,70.09,63.08,55.14,53.16,58.74
zipfian_whitening/centering_only,50.69,70.66,61.59,70.19,68.25,60.03,56.64,62.58
zipfian_whitening/whitening,61.63,78.36,69.48,76.83,74.08,60.11,71.6,70.3
abtp/component_removal,52.93,66.93,60.1,71.93,63.12,58.23,53.72,60.99


In [10]:
# word2vec
all_results["GoogleNews-vectors-negative300-torch_in_batch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,59.0,68.92,63.99,72.51,62.25,61.87,57.15,63.67
uniform_whitening/centering_only,57.88,70.34,64.24,74.71,65.57,62.47,58.09,64.76
uniform_whitening/whitening,58.45,69.42,65.46,76.43,67.78,62.87,60.85,65.89
zipfian_whitening/centering_only,55.02,71.47,65.81,74.36,69.52,62.92,61.02,65.73
zipfian_whitening/whitening,59.37,76.92,69.48,76.42,73.56,60.07,70.42,69.46
abtp/component_removal,56.33,70.42,64.71,74.74,65.19,62.55,58.21,64.59


In [11]:
# fasttext-en-torch
all_results["fasttext-en-torch_in_batch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,58.23,69.36,62.89,73.09,64.25,60.22,60.27,64.04
uniform_whitening/centering_only,60.6,69.51,61.09,73.92,64.49,61.14,57.42,64.02
uniform_whitening/whitening,55.56,63.51,57.73,70.68,62.4,57.93,54.65,60.35
zipfian_whitening/centering_only,55.92,73.36,65.72,74.12,72.18,62.3,62.95,66.65
zipfian_whitening/whitening,62.2,79.35,71.03,77.95,76.28,60.66,73.56,71.58
abtp/component_removal,59.13,71.0,63.3,74.8,65.96,61.69,58.23,64.87


In [12]:
# fasttext-en-subword-torch
all_results["fasttext-en-subword-torch_in_batch"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/mean,51.37,51.49,54.57,62.75,52.97,53.53,52.41,54.16
uniform_whitening/centering_only,51.31,44.8,49.66,62.27,47.43,54.86,43.12,50.49
uniform_whitening/whitening,51.52,49.33,53.51,68.28,58.34,56.94,51.69,55.66
zipfian_whitening/centering_only,43.15,53.4,53.67,63.05,59.09,56.57,47.16,53.73
zipfian_whitening/whitening,60.87,72.21,67.79,75.86,73.88,60.52,70.99,68.87
abtp/component_removal,49.06,45.16,49.57,62.14,50.75,55.49,44.53,50.96


# Table 10 (JSTS)

In [13]:
# Wiki frequency
model_names = [
    "fasttext-ja-torch"
]

pooling_names = [
    "normal/mean",
    "uniform_whitening/centering_only",
    "uniform_whitening/whitening",
    "zipfian_whitening/centering_only",
    "zipfian_whitening/whitening",
    "abtp/component_removal",
    "sif/sif_w_component_removal",
]

tasks = [
    "JSTS"
]

sim = "cos_sim"
cor = "spearman"

all_results = create_pandas_tables(model_names, pooling_names, tasks)
all_results["fasttext-ja-torch"]

Unnamed: 0_level_0,JSTS,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1
normal/mean,55.81,55.81
uniform_whitening/centering_only,56.06,56.06
uniform_whitening/whitening,55.53,55.53
zipfian_whitening/centering_only,57.55,57.55
zipfian_whitening/whitening,65.56,65.56
abtp/component_removal,57.14,57.14
sif/sif_w_component_removal,61.03,61.03


In [14]:
# Test set frequency
model_names = [
    "fasttext-ja-torch_in_batch"
]

pooling_names = [
    "normal/mean",
    "uniform_whitening/centering_only",
    "uniform_whitening/whitening",
    "zipfian_whitening/centering_only",
    "zipfian_whitening/whitening",
    "abtp/component_removal",
]

tasks = [
    "JSTS"
]

sim = "cos_sim"
cor = "spearman"

all_results = create_pandas_tables(model_names, pooling_names, tasks)
all_results["fasttext-ja-torch_in_batch"]

Unnamed: 0_level_0,JSTS,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1
normal/mean,59.94,59.94
uniform_whitening/centering_only,59.89,59.89
uniform_whitening/whitening,61.75,61.75
zipfian_whitening/centering_only,63.05,63.05
zipfian_whitening/whitening,69.86,69.86
abtp/component_removal,63.02,63.02


# Table 11 (norm / direction)

In [15]:
model_names = [
    "GoogleNews-vectors-negative300-torch", # word2vec
    "average_word_embeddings_glove.840B.300d", # glove
    "fasttext-en-torch",
    "fasttext-en-subword-torch",
]

pooling_names = [
    "normal/raw_then_zipfian_whitening_norm",
    "zipfian_whitening/raw_then_zipfian_whitening_dirction",
    "uniform_whitening/uniform_centering_then_zipfian_whitening_norm",
    "uniform_whitening/uniform_whitening_then_zipfian_whitening_norm",
    "zipfian_whitening/zipfian_whitening_then_uniform_centering_norm",
    "zipfian_whitening/zipfian_whitening_then_uniform_whitening_norm",
]

tasks = [
    "STS12",
    "STS13",
    "STS14",
    "STS15",
    "STS16",
    "SICK-R",
    "STSBenchmark",
]

sim = "cos_sim"
cor = "spearman"

all_results = create_pandas_tables(model_names, pooling_names, tasks)

In [16]:
# glove
# TODO: the current scores in the paper seems to be using pearson correlation, not spearman. check out why.
all_results["average_word_embeddings_glove.840B.300d"]

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/raw_then_zipfian_whitening_norm,62.08,66.62,59.34,65.93,58.57,57.52,55.43,60.78
zipfian_whitening/raw_then_zipfian_whitening_dirction,49.66,64.15,61.73,71.89,66.37,59.52,57.2,61.5
uniform_whitening/uniform_centering_then_zipfian_whitening_norm,61.41,63.94,58.33,63.88,56.9,56.39,54.62,59.35
uniform_whitening/uniform_whitening_then_zipfian_whitening_norm,60.2,73.63,64.98,74.21,66.67,60.53,61.55,65.97
zipfian_whitening/zipfian_whitening_then_uniform_centering_norm,47.12,61.14,59.42,70.1,63.46,58.58,53.87,59.1
zipfian_whitening/zipfian_whitening_then_uniform_whitening_norm,49.57,65.0,62.17,72.24,67.83,58.22,57.73,61.82


In [17]:
# word2vec
all_results['GoogleNews-vectors-negative300-torch']

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/raw_then_zipfian_whitening_norm,58.87,71.96,65.52,73.55,66.61,61.05,61.59,65.59
zipfian_whitening/raw_then_zipfian_whitening_dirction,55.45,68.07,66.49,75.98,68.46,63.41,63.25,65.87
uniform_whitening/uniform_centering_then_zipfian_whitening_norm,59.01,71.91,65.28,73.26,65.71,61.34,61.27,65.4
uniform_whitening/uniform_whitening_then_zipfian_whitening_norm,58.49,71.53,66.22,75.62,67.82,61.95,62.87,66.35
zipfian_whitening/zipfian_whitening_then_uniform_centering_norm,54.94,67.87,66.18,75.62,68.27,63.11,62.83,65.55
zipfian_whitening/zipfian_whitening_then_uniform_whitening_norm,53.4,66.91,65.15,75.11,66.9,63.24,60.95,64.52


In [18]:
all_results['fasttext-en-torch']

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/raw_then_zipfian_whitening_norm,61.35,75.69,66.77,75.69,69.41,61.24,65.31,67.92
zipfian_whitening/raw_then_zipfian_whitening_dirction,55.12,70.2,66.23,75.92,71.95,61.57,65.35,66.62
uniform_whitening/uniform_centering_then_zipfian_whitening_norm,65.26,70.32,63.63,71.24,62.99,60.34,61.24,65.0
uniform_whitening/uniform_whitening_then_zipfian_whitening_norm,62.48,76.02,66.73,76.55,68.65,61.56,64.31,68.04
zipfian_whitening/zipfian_whitening_then_uniform_centering_norm,52.86,68.4,64.6,74.39,70.23,60.92,62.7,64.87
zipfian_whitening/zipfian_whitening_then_uniform_whitening_norm,46.45,64.51,60.1,70.59,64.8,58.58,54.87,59.99


In [19]:
all_results['fasttext-en-subword-torch']

Unnamed: 0_level_0,STS12,STS13,STS14,STS15,STS16,SICK-R,STSBenchmark,Average
Pooling Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
normal/raw_then_zipfian_whitening_norm,58.03,60.35,60.76,67.99,61.74,56.35,61.47,60.96
zipfian_whitening/raw_then_zipfian_whitening_dirction,47.65,49.71,55.26,67.96,61.81,56.62,54.72,56.25
uniform_whitening/uniform_centering_then_zipfian_whitening_norm,60.29,58.99,61.11,70.23,65.36,57.78,63.06,62.4
uniform_whitening/uniform_whitening_then_zipfian_whitening_norm,61.06,60.35,63.07,74.29,69.19,59.56,65.09,64.66
zipfian_whitening/zipfian_whitening_then_uniform_centering_norm,46.17,49.23,54.49,67.33,61.11,55.95,53.01,55.33
zipfian_whitening/zipfian_whitening_then_uniform_whitening_norm,43.2,46.68,52.15,65.46,59.01,54.07,49.26,52.83
