In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
from pathlib import Path

In [2]:
def centroid(dwe, year, flashback = True, path="/home/max/Results/fb_pol-yearly-bert/sentence-bert-swedish-cased/centroid"):
    
    if flashback == False:
        path = path.replace("fb_pol", "fm_smh")
    
    with open(f"{path}/{year}.txt") as f:
        for term, vector in [tuple(line.strip("\n").split("\t")) for line in f.readlines()]:
            if term == dwe:
                return np.array([vector.split()])
    


In [3]:
#centroid("N1_återvandring", 2018).shape

In [4]:
def get_vectors(dwe, year, flashback = True, path="/home/max/Corpora/flashback-pol-time/yearly/contexts_per_term/vectors/sentence-bert-swedish-cased"):
    
    if flashback == False:
        path = path.replace("flashback-pol", "familjeliv-smh")    
    
    arr = []
    
    with open(f"{path}/{dwe}.txt") as f:
        for y, vector in [tuple(line.strip("\n").split("\t")) for line in f.readlines()]:
            if int(y) == year:
                arr.append(vector.split())
    
    return np.array(arr)
    

In [5]:
#get_vectors("N1_återvandring", 2018).shape

In [6]:
# x = centroid("N1_återvandring", 2018)
# y = get_vectors("N1_återvandring", 2018)

In [7]:
# cs = cosine_similarity(x,y)

In [8]:
def get_examples(dwe, year, indices, flashback = True, path="/home/max/Corpora/flashback-pol-time/yearly/contexts_per_term/files"):

    if flashback == False:
        path = path.replace("flashback-pol", "familjeliv-smh")      
    
    exs = []
    
    with open(f"{path}/{dwe}.txt") as f:
        for y, text in [tuple(line.strip("\n").split("\t")) for line in f.readlines()]:
            if int(y) == year:
                exs.append(text)

    top_ex = [exs[idx] for idx in indices]

    return top_ex

In [9]:
# get_examples("N1_återvandring", 2018, [2009, 3020, 3451, 2699, 3129, 3437, 3579, 1786, 2284, 2904])

In [27]:
def bertex(dwe, year, flashback = True, top_k = 20, save_to_file = None, create_filename = False):
    
    year_centroid = centroid(dwe, year, flashback = flashback)
    
    year_vectors  = get_vectors(dwe, year, flashback = flashback)
    
    cs = cosine_similarity(year_centroid, year_vectors).flatten()
    
    top_idxs = np.argpartition(cs, -top_k)[-top_k:]
    #print(sorted(top_idxs))
    
    examples = get_examples(dwe, year, indices = top_idxs, flashback = flashback)
    
    if save_to_file != None:
        corpus = "Flashback" if flashback else "Familjeliv"
        if create_filename:
            save_to_file = save_to_file + f"{corpus.lower()}_{dwe}_{year}.md"
        with open(save_to_file, "w") as f:
            f.write(f"## {dwe}, {year}, {corpus}, N = {len(year_vectors)}\n")
            for n, (score, ex) in enumerate(sorted(list(zip(cs[top_idxs], examples)), reverse = True), start = 1):
                f.write(f"{n}.\t({score: .2f}) {ex}\n")
    
    return examples


In [11]:
def create_book(path):
    path = Path(path)
    book = []
    for file in sorted(os.listdir(path)):
        with open(path/file) as f:
            book.append(f.read())
    with open(path/"book.md", "w") as f:
        f.write(f"# BOOK OF EXAMPLES\n")
        for b in book:
            f.write(b)
    
    

### BOOK 1 (rectified change)

In [None]:
divergences_of_interest = [
    ("N1_återvandring", 2011, True),
    ("N1_återvandring", 2011, False),
    ("N1_återvandring", 2012, True),
    ("N1_återvandring", 2012, False), 
    ("N1_återvandring", 2018, True),
    ("N1_återvandring", 2018, False),
    ("N1_återvandring", 2019, True),
    ("N1_återvandring", 2019, False),
    ("N1_kulturberikare", 2009, True),
    ("N1_kulturberikare", 2009, False),
    ("N1_kulturberikare", 2010, True),
    ("N1_kulturberikare", 2010, False),
    ("V1_berika", 2006, True),  
    ("V1_berika", 2006, False),
    ("V1_berika", 2007, True),  
    ("V1_berika", 2007, False), 
    ("V1_berika", 2016, True),  
    ("V1_berika", 2016, False),
    ("V1_berika", 2017, True),  
    ("V1_berika", 2017, False),  
    ("N1_globalist", 2019, True),
    ("N1_globalist", 2019, False),
    ("N1_globalist", 2020, True),
    ("N1_globalist", 2020, False),
    ("V1_hjälpa_på_plats", 2013, True),
    ("V1_hjälpa_på_plats", 2013, False),
    ("V1_hjälpa_på_plats", 2014, True),
    ("V1_hjälpa_på_plats", 2014, False),   
]

for dwe, year, fb in divergences_of_interest:
    print(dwe, year, end="\r")
    try:
        bertex(dwe, year, fb, top_k = 30, save_to_file = "/home/max/Documents/research/examples_divergence/", create_filename = True)
    except ValueError:
        try:
            bertex(dwe, year, fb, top_k = 20, save_to_file = "/home/max/Documents/research/examples_divergence/", create_filename = True)
        except ValueError:
            bertex(dwe, year, fb, top_k = 10, save_to_file = "/home/max/Documents/research/examples_divergence/", create_filename = True)

        

In [None]:
create_book("/home/max/Documents/research/examples_divergence/")

### BOOK 2 (naive change)

In [28]:
for tv in [True, False]:
    for dwe, year in [
        ("N1_återvandring", 2008),
        ("N1_återvandring", 2009),        
        ("N1_återvandring", 2011),
        ("N1_återvandring", 2012),
        ("N1_återvandring", 2021),
        ("N1_återvandring", 2022),

        ("N1_kulturberikare", 2007),
        ("N1_kulturberikare", 2008),
        ("N1_kulturberikare", 2011),
        ("N1_kulturberikare", 2012),
        ("N1_kulturberikare", 2013),

        ("V1_berika", 2004),  
        ("V1_berika", 2005),  
        ("V1_berika", 2018),  
        ("V1_berika", 2019),  
        ("V1_berika", 2020),  
        ("V1_berika", 2021),  
        ("V1_berika", 2022), 
        
        ("V1_kulturberika", 2009),  
        ("V1_kulturberika", 2010),  
        ("V1_kulturberika", 2011),  

        ("N1_globalist", 2019),
        ("N1_globalist", 2020)
    ]:

# for dwe, year, fb in divergences_of_interest:
        print(dwe, year, end="\r")
        try:
            bertex(dwe, year, tv, save_to_file = "/home/max/Documents/research/examples_divergence/gch/", create_filename = True)
        except ValueError:
            bertex(dwe, year, tv, top_k = 10, save_to_file = "/home/max/Documents/research/examples_divergence/gch/", create_filename = True)

print("done!")

done!obalist 202001113


In [29]:
create_book("/home/max/Documents/research/examples_divergence/gch/")

## Återvandring

In [None]:
#bertex("N1_återvandring", 2011, save_to_file="/home/max/Desktop/", create_filename = True)

In [None]:
bertex("N1_återvandring", 2011)

In [None]:
bertex("N1_återvandring", 2012)

In [None]:
bertex("N1_återvandring", 2011, flashback=False)

In [None]:
bertex("N1_återvandring", 2012, flashback=False)

In [None]:
bertex("N1_återvandring", 2018)

In [None]:
bertex("N1_återvandring", 2019)

In [None]:
bertex("N1_återvandring", 2018, flashback = False)

In [None]:
bertex("N1_återvandring", 2019, flashback = False)

## Kulturberika

In [None]:
bertex("N1_kulturberikare", 2009)

In [None]:
bertex("N1_kulturberikare", 2010)

In [None]:
bertex("N1_kulturberikare", 2009, flashback=False)

In [None]:
bertex("N1_kulturberikare", 2010, flashback=False)

## Berika

In [None]:
bertex("V1_berika", 2006)

In [None]:
bertex("V1_berika", 2007)

In [None]:
bertex("V1_berika", 2006, flashback=False)

In [None]:
bertex("V1_berika", 2007, flashback=False)

In [None]:
bertex("V1_berika", 2016)

In [None]:
bertex("V1_berika", 2017)

In [None]:
bertex("V1_berika", 2016, flashback=False)

In [None]:
bertex("V1_berika", 2017, flashback=False)

## Globalist

In [None]:
bertex("N1_globalist", 2019)

In [None]:
bertex("N1_globalist", 2020)

In [None]:
bertex("N1_globalist", 2019, flashback=False, top_k = 20)

In [None]:
bertex("N1_globalist", 2020, flashback=False, top_k = 10)

## Hjälpa på plats

In [None]:
bertex("V1_hjälpa_på_plats", 2013)

In [None]:
bertex("V1_hjälpa_på_plats", 2014)

In [None]:
bertex("V1_hjälpa_på_plats", 2013, flashback=False)

In [None]:
bertex("V1_hjälpa_på_plats", 2014, flashback=False)