In [1]:
import os
import pathlib
from dotenv import load_dotenv
load_dotenv()

FILE_DIR = pathlib.Path(os.getenv('FILE_DIR')).expanduser()

In [2]:
import re

result_list = []

with open(f"{FILE_DIR}/questions-words.txt", "r") as file:
  for line in file:
    result_dict = {}
    if m := re.match(r": (.+)", line):
      category = m.group(1)
      continue

    vectors = line.split(" ")
    result_dict["category"] = category
    result_dict["vec_1"] = vectors[0]
    result_dict["vec_2"] = vectors[1]
    result_dict["vec_3"] = vectors[2]
    result_dict["true"]  = vectors[3].strip()

    result_list.append(result_dict)

In [3]:
import pandas as pd

df = pd.DataFrame(result_list)
df

Unnamed: 0,category,vec_1,vec_2,vec_3,true
0,capital-common-countries,Athens,Greece,Baghdad,Iraq
1,capital-common-countries,Athens,Greece,Bangkok,Thailand
2,capital-common-countries,Athens,Greece,Beijing,China
3,capital-common-countries,Athens,Greece,Berlin,Germany
4,capital-common-countries,Athens,Greece,Bern,Switzerland
...,...,...,...,...,...
19539,gram9-plural-verbs,write,writes,talk,talks
19540,gram9-plural-verbs,write,writes,think,thinks
19541,gram9-plural-verbs,write,writes,vanish,vanishes
19542,gram9-plural-verbs,write,writes,walk,walks


In [4]:
from importnb import imports

with imports("ipynb"):
  from knock60 import model

In [9]:
def calc_similarity(df: pd.DataFrame):
  return model.most_similar(positive=[df["vec_2"], df["vec_3"]], negative=[df["vec_1"]], topn=1)[0]

In [10]:
df[["pred", "similarity"]] = df.apply(calc_similarity, axis=1, result_type="expand")
df

Unnamed: 0,category,vec_1,vec_2,vec_3,true,pred,similarity
0,capital-common-countries,Athens,Greece,Baghdad,Iraq,Iraqi,0.635187
1,capital-common-countries,Athens,Greece,Bangkok,Thailand,Thailand,0.713767
2,capital-common-countries,Athens,Greece,Beijing,China,China,0.723578
3,capital-common-countries,Athens,Greece,Berlin,Germany,Germany,0.673462
4,capital-common-countries,Athens,Greece,Bern,Switzerland,Switzerland,0.491975
...,...,...,...,...,...,...,...
19539,gram9-plural-verbs,write,writes,talk,talks,talked,0.544719
19540,gram9-plural-verbs,write,writes,think,thinks,thinks,0.617773
19541,gram9-plural-verbs,write,writes,vanish,vanishes,disappear,0.600271
19542,gram9-plural-verbs,write,writes,walk,walks,walks,0.553434


In [11]:
df.to_csv("./result_64.csv", index=None)