In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import re
from util import load_metric
from scipy.stats import spearmanr, pearsonr
#from collections import Counter

## Functions

In [2]:
def jaccard(a, b):
    return len(a.intersection(b)) / len(a.union(b))

In [3]:
def q_show(df, var, k = 40, as_table = False):
    # Note: as_table only for transition variables!
    
    ser = [(col, list(df[col].sort_values(ascending=False)[:k].index)) for col in df.columns if col.startswith(var)]
    
    for i, (col, s) in enumerate(ser):
        if i > 0:
            jac = round(jaccard(set(s), ser[i-1][-1]), 2)
        else:
            jac = None
        
        print()
        print(col, "jaccard =", jac)
        
        if as_table == False:
            print(s)
        else:
            trans  = col.split("_")[-1]
            ti, tj = tuple(trans.split(":"))
            table  = [] 
            for word in s:
                v    = df.loc[word][col]
                f_ti = int(df.loc[word][f"frq_{ti}"])
                f_tj = int(df.loc[word][f"frq_{tj}"])
                gch  = df.loc[word][f"gch_{trans}"]
                m    = df.loc[word][f"mccc_{trans}"]
                std  = df.loc[word][f"stdc_{trans}"]
                columns = ["Word", var.upper(), "n_i", "n_j", "GCH", "Mctrl", "Sctrl"]
                table.append([word, v, f_ti, f_tj, gch, m, std])
            print(pd.DataFrame(table, columns=columns))
            
    
    

In [4]:
def change_show(df, var, targets, th=4.781):

    for col in sorted([col for col in df.columns if col.startswith(var)]):
        trans  = col.split("_")[-1]
        ti, tj = tuple(trans.split(":"))
        table  = [] 
        
        for word in targets:
            v    = df.loc[word][col]
            f_ti = int(df.loc[word][f"frq_{ti}"])
            f_tj = int(df.loc[word][f"frq_{tj}"])
            
            if th != None:
                if v > th:
                    table.append([word, v, f_ti, f_tj])
            else:
                table.append([word, v, f_ti, f_tj])
        
        columns = ["Word", "Value", f"n_{ti}", f"n_{tj}"]
        if table != []:
            display = pd.DataFrame(table, columns=columns)
            print()
            print(col)
            print(display)
                

In [5]:
def read_csv(path):
    return pd.read_csv(path, sep=";", index_col=0)

In [6]:
def get_dwts(df, path):
    with open(Path(path), "r") as f:
        dwt_roots = [w.strip("\n") for w in f.readlines()]
    dwt_regex = re.compile(f"({'|'.join(dwt_roots)})")    
    dwts = [str(w) for w in df.index if re.search(dwt_regex, str(w)) != None]
    return dwts    

In [7]:
def get_variables(df):
    
    yr_prefix = set()
    tr_prefix = set()
    years = set()
    transitions = set()
    
    for v in df.columns:
        prefix, suffix = tuple(v.split("_"))
        if ":" in suffix:
            tr_prefix.add(prefix)
            transitions.add(suffix)
        else:
            yr_prefix.add(prefix)
            years.add(suffix)
    
    return {"yr_prefix": yr_prefix, 
            "tr_prefix": tr_prefix,
            "years": years,
            "transitions": transitions
           }
            

In [8]:
def checker(word, transition, controls_dir, n_ctrl=10, variable="cosine_change"):
    """
    param word
    param transition    tupple of ti and tj 
    param controls_dir  where to find controls
    param variable      "cosine_change" or "cosine_sim"
    """
    
    ti, tj = transition
    basename = Path(controls_dir) / variable 
    
    filenames = [f"{ti}_{tj}_control{n}.txt" for n in range(1, n_ctrl+1)]
    
    values = []
    
    for file in filenames:
        data = load_metric(basename / file)
        value = data[word]
        values.append(value)
        print(file, value)
        
    return values   

In [9]:
def ncd(DATA, CORPUS, VAR, VAL): # No Chnage Detector
#     VAR = "gsim"
#     VAL = 1.0
#     TRANSIITON = "2007:2011"

    corpus = Path(CORPUS)
    transitions = find_transitions(corpus / "vocab")    

    for yi, yj in transitions:
        print()
        print(f"{yi}:{yj}")
        
        A = list(DATA[DATA[f"{VAR}_{yi}:{yj}"] == VAL].index)
        print("No change (A):", len(A))
        B = list(DATA[DATA[f"{VAR}_{yi}:{yj}"] != VAL].index)
        print("Other (B):", len(B))
        # u = list(toypol[toypol[f"{VAR}_2007:2011"] == np.nan].index)
        # print("Undefined (U):", len(u))
        print()
        print("{: <20} {}".format("A", "B"))
        print("{: <20} {}".format("---", "---"))
        for w1, w2 in zip(A[:100], B[:100]):
            print(f"{w1: <20} {w2}")

In [10]:
def find_transitions(vocabs):
    
    years = [int(file.strip(".txt")) for file in os.listdir(vocabs)]
    years.sort()
    transitions = [(year, years[i]) for i, year in enumerate(years[:-1], start=1)]
    
    return transitions

In [11]:
def w_overlap_checker(corpus, th_c):
    
    corpus = Path(corpus)
    transitions = find_transitions(corpus / "vocab")
    
    for yi, yj in transitions:
        print()
        print(f"{yi}:{yj}")
        
        voc_a = load_metric(corpus / f"vocab/{yi}.txt")
        voc_b = load_metric(corpus / f"vocab/{yj}.txt")

        voc_a = {w: c for w, c in voc_a.items() if c >= th_c}
        voc_b = {w: c for w, c in voc_b.items() if c >= th_c}
        print(f"{yi}:", len(voc_a))
        print(f"{yj}:", len(voc_b))
        print(f"{yi} and {yj}:", len([w for w in voc_a.keys() if w in voc_b.keys()]))
        print(f"{yi} or {yj}:", len(set(voc_a.keys()).union(set(voc_b.keys()))))
        print(f"{yi} - {yj}:", len(set(voc_a.keys()).difference(set(voc_b.keys()))))

In [13]:
def correlation(df, var1, var2, corpus, mode=1, metric="pearson"):

    transitions = find_transitions(Path(corpus) / "vocab")

    if mode == 1:
        correlation = df[[f"{var1}_{ti}:{tj}" for ti, tj in transitions]].corrwith(df[[f"{var2}_{ti}:{tj}" for ti, tj in transitions]], axis=1)
    
    if mode == 2:
        table = []
        for w in df.index:
            valid = []
            X = df[[f"{var1}_{ti}:{tj}" for ti, tj in transitions]].loc[w]
            Y = df[[f"{var2}_{ti}:{tj}" for ti, tj in transitions]].loc[w]
            for x, y in zip(X, Y):
                if pd.isna(x):
                    continue
                if pd.isna(y):
                    continue
                valid.append((x, y))
            
            N = len(valid)
            
            if N < 2:
                v = np.nan
                p = np.nan
            else:
                if metric == "pearson":
                    X, Y = zip(*valid)
                    R_data = pearsonr(X, Y)
                    v = R_data.statistic
                    p = R_data.pvalue
                    
#                     print("X", [round(x*100000000, 2) for x in X])
#                     print("Y", [round(y, 2) for y in Y])

                if metric == "spearman":
                    X, Y = zip(*valid)
                    R_data = spearmanr(X, Y)
                    v = R_data.statistic
                    p = R_data.pvalue
                    
            table.append([w, round(v, 2), round(p, 2), N])
        correlation = pd.DataFrame(table, columns=["Word", "Correlation", "p", "N"])
        
    return correlation

## Files

In [14]:
#file_path = Path("../../dw_results/fb_pol-yearly-radical3.csv")
#file_path = Path("fb_pol-yearly-radical3.csv")
results_dir = Path("../../dw_results")

In [15]:
files = os.listdir(results_dir)
_ = [print(file) for file in files]

fb_pol-yearly-radical3-restricted.csv
fb_pol-yearly-radical3-full.csv


In [16]:
dwt_path = "../data/utils/dwts.txt"

In [17]:
#corpus = Path("/srv/data/gusbohom/root/corpora/toypol/time_bin/radical3/")
corpus = Path("/home/max/Corpora/flashback-pol-time/yearly/fb-pt-radical3")

## Q-Show

In [18]:
df_yearly = read_csv(results_dir / "fb_pol-yearly-radical3-full.csv")

In [None]:
df_time_bin = read_csv(results_dir / "fb_pol-time_bin-radical3-full.csv")

In [None]:
toypol = read_csv(Path("../../toypol-time_bin.csv"))

### Check

### Yearly

In [19]:
q_show(df_yearly, "rch", as_table = True)
# q_show("rsim")
# q_show("gch")
# q_show("gsim")


rch_2000:2001 jaccard = None
           Word         RCH   n_i   n_j       GCH     Mctrl     Sctrl
0      bidragit  235.059543    11    14  0.167895  0.080191  0.000356
1   individuell   25.213364    10    11  0.161957  0.122159  0.001505
2    avslöjande   22.048312    11    10  0.076490  0.053672  0.000987
3             $   21.444180    17    22  0.205694  0.026920  0.007949
4     interests   17.946884    12    10  0.034739  0.023519  0.000596
5   självkänsla   14.758416    10    10  0.113900  0.100157  0.000888
6     gåsfonden   14.398690    13    93  0.227088  0.036601  0.012614
7            bo   13.568608   123   225  0.335645  0.080635  0.017919
8            re   12.918446    47   367  0.209023  0.078862  0.009607
9       gyllene   11.470559    11    10  0.113252  0.079837  0.002778
10        about   10.527548    93    44  0.046623  0.023362  0.002107
11      element   10.152353   109    35  0.403042  0.173649  0.021543
12        björn    9.599879    37  1472  0.289176  0.117116 

               Word        RCH   n_i    n_j       GCH     Mctrl     Sctrl
0             yngre  33.683267    27    132  0.218046  0.136273  0.002315
1            rights  25.305922    17    100  0.145049  0.044933  0.003772
2       palestinian  24.393496    30    171  0.103509  0.033486  0.002737
3      organization  22.747619    10     59  0.096866  0.029806  0.002811
4          reported  21.397675    12     81  0.083963  0.024755  0.002638
5              utav  21.145010   131    400  0.242711  0.077529  0.007448
6           israeli  19.225558    15    186  0.104923  0.033410  0.003547
7           utanför  19.185751   102    434  0.197012  0.096550  0.004993
8               his  18.704823    38    804  0.076839  0.032292  0.002271
9               has  18.667681    85    907  0.082116  0.028798  0.002723
10          weapons  18.625770    12    118  0.134679  0.040085  0.004842
11             home  18.294653    15     73  0.102950  0.031076  0.003746
12           killed  18.287275    11  

                   Word        RCH  n_i   n_j       GCH     Mctrl     Sctrl
0                  paul  26.678499  104  1129  0.319997  0.138756  0.006477
1                    rs  16.446705   86   723  0.218520  0.161146  0.003326
2            södertälje  12.878466   88   641  0.257833  0.181936  0.005619
3                srpska  12.383602   29   164  0.225571  0.136537  0.006855
4             bosniaker  12.162480   12   210  0.267937  0.144800  0.009653
5     anhöriginvandring  11.856039   23   127  0.241849  0.168430  0.005904
6             ahtisaari  11.771438   13   140  0.270352  0.168430  0.008255
7              executed  11.382694   10    63  0.128165  0.052554  0.006334
8              ignatius  11.198598   25    74  0.370176  0.174822  0.016633
9               bosnian  11.103880   20   377  0.157992  0.077176  0.006939
10             turkiska  10.633667   92   544  0.240987  0.195961  0.004037
11          antisvenska  10.494950   10    10  0.218287  0.188936  0.002667
12          

                  Word        RCH   n_i   n_j       GCH     Mctrl     Sctrl
0               joakim  31.818962    45  3345  0.402971  0.143524  0.007774
1                daddy  30.733513    82  7785  0.301955  0.120659  0.005624
2               burken  28.733384    23   222  0.317186  0.150707  0.005524
3             dotterns  28.295205    19   459  0.301752  0.160215  0.004769
4               promoe  27.617470    10    11  0.182009  0.136220  0.001581
5                 ship  21.027058    23   587  0.353783  0.186591  0.007581
6      dokumentationen  20.580278    15    72  0.255446  0.141874  0.005262
7         uthängningar  20.574244    20   151  0.313520  0.168807  0.006706
8        vårdnadstvist  20.322284    30   817  0.287689  0.162581  0.005870
9               mamman  20.172215   514  5894  0.260553  0.119300  0.006676
10               malmö  18.083135  3743  6173  0.210982  0.147189  0.003364
11            förhöret  17.922613    12   723  0.326444  0.153402  0.009206
12          

                     Word        RCH   n_i   n_j       GCH     Mctrl     Sctrl
0               morfologi  94.033470    10    11  0.107101  0.097916  0.000093
1              rullatorer  39.292112    10    10  0.155838  0.127021  0.000699
2              spökstäder  36.945656    10    10  0.156394  0.134149  0.000574
3                   ipsos  18.124646    28   380  0.288577  0.154350  0.007061
4                   sarin  16.531725    10   301  0.289497  0.160960  0.007413
5               megafonen  15.799792    11   142  0.323719  0.179602  0.008697
6               ringvägen  15.758049    10    12  0.154984  0.120209  0.002104
7                 svoboda  14.521501    14   118  0.277599  0.155253  0.008033
8             startbidrag  13.581912    11    98  0.288882  0.118748  0.011944
9                     hen  12.920451  3071  2220  0.239008  0.193697  0.003344
10               sjöström  12.713881    10   136  0.335166  0.168219  0.012520
11            tvmottagare  12.396997    20   121  0.

              Word        RCH   n_i    n_j       GCH     Mctrl     Sctrl
0        delegater  40.466097    19    761  0.314584  0.150520  0.003866
1          intyget  33.673439    14    366  0.353526  0.153970  0.005650
2          intygen  23.692381    10     56  0.320088  0.155055  0.006641
3        livvakten  21.350579    16    542  0.360183  0.169205  0.008529
4          armband  21.211514    16    281  0.356839  0.167965  0.008490
5      delegaterna  18.513997    17    355  0.275417  0.139505  0.006999
6             köln  17.728325    12   1062  0.365740  0.182242  0.009869
7             ohio  17.567471    24    967  0.286573  0.151830  0.007313
8            grand  17.129176   317   1165  0.310552  0.168126  0.007928
9           remain  16.754968    67    559  0.333683  0.178552  0.008828
10        giuliani  15.653091    10    216  0.293904  0.167143  0.007721
11          kasich  15.468569    25   1537  0.251837  0.157205  0.005833
12        pollarna  14.982637    11    139  0.29233

                   Word        RCH   n_i   n_j       GCH     Mctrl     Sctrl
0   ingenjörsutbildning  67.981436    10    11  0.166853  0.150573  0.000228
1               synfält  36.585079    10    11  0.231563  0.198359  0.000865
2        underanställda  29.901005    10    11  0.150337  0.138044  0.000392
3                nodeal  24.212817    10   159  0.283893  0.150307  0.005260
4             elräkning  24.035591    11    11  0.204776  0.152911  0.002057
5                 biden  23.656839    84  2682  0.258361  0.142006  0.004690
6                   aoc  23.638878    22   294  0.304234  0.195276  0.004395
7                  barr  17.381581    10   575  0.365253  0.164780  0.010997
8                   pro  16.515291   299   707  0.262443  0.162046  0.005796
9              talesmän  16.390192    19    95  0.326376  0.137749  0.010973
10             oslagbar  16.041123    11    10  0.255501  0.202351  0.003159
11         ocasiocortez  15.603030    29   155  0.272438  0.169963  0.006262

                   Word         RCH   n_i     n_j       GCH     Mctrl  \
0                   mai  136.024486    10      11  0.149252  0.106358   
1             styvpappa   53.836676    10      10  0.161905  0.144820   
2               dvärgen   38.402952   140    2917  0.321858  0.120450   
3              ukrainas   38.143985   583   16002  0.240016  0.122054   
4                  rysk   37.053960  1366   24143  0.208003  0.116815   
5             ukrainare   32.335195   642   11523  0.247353  0.130234   
6            friendship   30.363037    15     310  0.303508  0.080654   
7             ukrainska   29.939520  1192   27293  0.248200  0.114433   
8               clashes   27.429785    17     273  0.241740  0.088204   
9             bedövande   27.336465    13     160  0.365303  0.121507   
10                ghost   25.928649    15     380  0.375920  0.179283   
11              russian   23.796543   447    9095  0.200902  0.112774   
12            sabotaget   23.044929    15     279  

In [None]:
q_show(toypol, "rch", as_table = True)

In [None]:
q_show(toypol, "gsim", as_table = True)

### Time bin

In [None]:
q_show(df_time_bin, "rch", as_table = True)

## Change Show

In [20]:
df_yearly_dwt = read_csv(results_dir / "fb_pol-yearly-radical3-restricted.csv")

In [None]:
df_time_bin_dwt = read_csv(results_dir / "fb_pol-time_bin-radical3-restricted.csv")

### Yearly

In [21]:
# change_show(df, var, targets, th=4.781)
change_show(df_yearly_dwt, "rch", get_dwts(df_yearly_dwt, dwt_path))


rch_2006:2007
                Word     Value  n_2006  n_2007
0  N1_kulturberikare  5.140348      51     255
1        N1_berikare  7.390323      14     185

rch_2007:2008
           Word     Value  n_2007  n_2008
0  N1_globalist  9.722278      26     294

rch_2009:2010
                Word     Value  n_2009  n_2010
0  N1_kulturberikare  8.116137     525     925

rch_2017:2018
              Word     Value  n_2017  n_2018
0  N1_återvandring  11.08269     165    3288

rch_2018:2019
              Word     Value  n_2018  n_2019
0  N1_återvandring  6.600887    3288    1510


In [22]:
change_show(df_yearly_dwt, "rch", get_dwts(df_yearly_dwt, dwt_path), th=None)


rch_2000:2001
                              Word     Value  n_2000  n_2001
0                        V1_berika  3.112465      41      20
1                N1_kulturberikare -1.504499      40      28
2                      N1_berikare       NaN      12       0
3                  V1_kulturberika       NaN       8       0
4                   N1_förortsgäng       NaN       8       0
5                     N1_globalist       NaN       0       0
6                  N1_återvandring       NaN       0       0
7                  A1_globalistisk       NaN       0       0
8                  antiglobalister       NaN       0       0
9                    V1_återvandra       NaN       0       0
10                        oberikat       NaN       0       0
11              V1_hjälpa_på_plats       NaN       0       0
12                       oberikade       NaN       0       0
13                        oberikad       NaN       0       0
14                 N2_återvandrare       NaN       0       0
15       


rch_2007:2008
                              Word     Value  n_2007  n_2008
0                        V1_berika  1.556598     611    1356
1                N1_kulturberikare  0.416677     255     689
2                      N1_berikare  2.030875     185     602
3                  V1_kulturberika  0.802065     121     213
4                   N1_förortsgäng       NaN       0       7
5                     N1_globalist  9.722278      26     294
6                  N1_återvandring  4.196763      47     112
7                  A1_globalistisk       NaN       7      33
8                  antiglobalister       NaN       5       0
9                    V1_återvandra -0.404192      19      23
10                        oberikat       NaN       0       5
11              V1_hjälpa_på_plats       NaN       0       0
12                       oberikade       NaN       0       0
13                        oberikad       NaN       0       0
14                 N2_återvandrare       NaN       0       0
15       


rch_2015:2016
                              Word     Value  n_2015  n_2016
0                        V1_berika -0.781521    1095    1247
1                N1_kulturberikare -0.206462     128     171
2                      N1_berikare -2.922248     198     145
3                  V1_kulturberika -2.600950     120      98
4                   N1_förortsgäng -2.541556      15      12
5                     N1_globalist  0.759366     676    1836
6                  N1_återvandring -2.478115     136     200
7                  A1_globalistisk -0.538985     132     261
8                  antiglobalister       NaN       0      14
9                    V1_återvandra -3.196311      28      19
10                        oberikat       NaN       0       0
11              V1_hjälpa_på_plats -8.184708     251      65
12                       oberikade       NaN       5       0
13                        oberikad       NaN       0       6
14                 N2_återvandrare       NaN       0       0
15       

In [23]:
change_show(df_yearly_dwt, "gsim", get_dwts(df_yearly_dwt, dwt_path), th=None)


gsim_2000:2001
                              Word     Value  n_2000  n_2001
0                        V1_berika  0.802447      41      20
1                N1_kulturberikare  0.904334      40      28
2                      N1_berikare       NaN      12       0
3                  V1_kulturberika       NaN       8       0
4                   N1_förortsgäng       NaN       8       0
5                     N1_globalist       NaN       0       0
6                  N1_återvandring       NaN       0       0
7                  A1_globalistisk       NaN       0       0
8                  antiglobalister       NaN       0       0
9                    V1_återvandra       NaN       0       0
10                        oberikat       NaN       0       0
11              V1_hjälpa_på_plats       NaN       0       0
12                       oberikade       NaN       0       0
13                        oberikad       NaN       0       0
14                 N2_återvandrare       NaN       0       0
15      


gsim_2008:2009
                              Word     Value  n_2008  n_2009
0                        V1_berika  0.813770    1356    1860
1                N1_kulturberikare  0.720900     689     525
2                      N1_berikare  0.721555     602     611
3                  V1_kulturberika  0.782498     213     247
4                   N1_förortsgäng       NaN       7      18
5                     N1_globalist  0.839476     294     204
6                  N1_återvandring  0.935985     112      51
7                  A1_globalistisk  0.885135      33      41
8                  antiglobalister       NaN       0       0
9                    V1_återvandra  0.907361      23      26
10                        oberikat       NaN       5       8
11              V1_hjälpa_på_plats       NaN       0      14
12                       oberikade       NaN       0      11
13                        oberikad       NaN       0       5
14                 N2_återvandrare       NaN       0       0
15      


gsim_2015:2016
                              Word     Value  n_2015  n_2016
0                        V1_berika  0.824113    1095    1247
1                N1_kulturberikare  0.734825     128     171
2                      N1_berikare  0.782705     198     145
3                  V1_kulturberika  0.872261     120      98
4                   N1_förortsgäng  0.927844      15      12
5                     N1_globalist  0.800942     676    1836
6                  N1_återvandring  0.889528     136     200
7                  A1_globalistisk  0.785606     132     261
8                  antiglobalister       NaN       0      14
9                    V1_återvandra  0.962083      28      19
10                        oberikat       NaN       0       0
11              V1_hjälpa_på_plats  0.931879     251      65
12                       oberikade       NaN       5       0
13                        oberikad       NaN       0       6
14                 N2_återvandrare       NaN       0       0
15      

### Time bin

In [None]:
change_show(df_time_bin_dwt, "rch", get_dwts(df_time_bin_dwt, dwt_path))

In [None]:
change_show(df_time_bin_dwt, "gsim", get_dwts(df_time_bin_dwt, dwt_path), th=None)

In [None]:
a=load_metric("/srv/data/gusbohom/root/corpora/fb_pol/time_bin/radical3/vocab/2015.txt")["V1_berika"]
b=load_metric("/srv/data/gusbohom/root/corpora/fb_pol/time_bin/radical3/vocab/2019.txt")["V1_berika"]
print(a, b)

## Correlation

In [24]:
get_variables(df_yearly_dwt)

{'yr_prefix': {'fpm', 'frq'},
 'tr_prefix': {'dif',
  'diffpm',
  'gch',
  'gsim',
  'mccc',
  'mcsim',
  'rch',
  'rsim',
  'stdc',
  'stdsim'},
 'years': {'2000',
  '2001',
  '2002',
  '2003',
  '2004',
  '2005',
  '2006',
  '2007',
  '2008',
  '2009',
  '2010',
  '2011',
  '2012',
  '2013',
  '2014',
  '2015',
  '2016',
  '2017',
  '2018',
  '2019',
  '2020',
  '2021',
  '2022',
  'doc',
  'tot'},
 'transitions': {'2000:2001',
  '2001:2002',
  '2002:2003',
  '2003:2004',
  '2004:2005',
  '2005:2006',
  '2006:2007',
  '2007:2008',
  '2008:2009',
  '2009:2010',
  '2010:2011',
  '2011:2012',
  '2012:2013',
  '2013:2014',
  '2014:2015',
  '2015:2016',
  '2016:2017',
  '2017:2018',
  '2018:2019',
  '2019:2020',
  '2020:2021',
  '2021:2022'}}

In [26]:
correlation(df=df_yearly_dwt, var1="diffpm", var2="rch", corpus=corpus, mode=2, metric = "pearson")

Unnamed: 0,Word,Correlation,p,N
0,V1_berika,-0.13,0.56,22
1,N1_kulturberikare,-0.39,0.1,19
2,N1_berikare,-0.43,0.1,16
3,V1_kulturberika,-0.53,0.04,16
4,N1_förortsgäng,0.02,0.98,6
5,N1_globalist,-0.19,0.47,17
6,N1_återvandring,-0.4,0.14,15
7,A1_globalistisk,-0.32,0.27,14
8,antiglobalister,-0.88,0.02,6
9,V1_återvandra,-0.56,0.03,15


In [29]:
correlation(df=df_yearly_dwt, var1="stdc", var2="rch", corpus=corpus, mode=2, metric = "pearson")

Unnamed: 0,Word,Correlation,p,N
0,V1_berika,0.21,0.35,22
1,N1_kulturberikare,-0.29,0.23,19
2,N1_berikare,-0.11,0.69,16
3,V1_kulturberika,0.04,0.88,16
4,N1_förortsgäng,0.36,0.49,6
5,N1_globalist,-0.13,0.63,17
6,N1_återvandring,-0.39,0.15,15
7,A1_globalistisk,0.26,0.36,14
8,antiglobalister,0.17,0.75,6
9,V1_återvandra,-0.01,0.97,15


In [30]:
correlation(df=df_yearly_dwt, var1="diffpm", var2="rch", corpus=corpus, mode=2, metric = "spearman")

Unnamed: 0,Word,Correlation,p,N
0,V1_berika,-0.03,0.9,22
1,N1_kulturberikare,-0.38,0.11,19
2,N1_berikare,-0.14,0.6,16
3,V1_kulturberika,-0.08,0.78,16
4,N1_förortsgäng,0.09,0.87,6
5,N1_globalist,-0.47,0.06,17
6,N1_återvandring,-0.21,0.44,15
7,A1_globalistisk,-0.3,0.3,14
8,antiglobalister,-0.6,0.21,6
9,V1_återvandra,-0.19,0.51,15
