In [19]:
from collections import Counter
import pandas as pd
from datetime import datetime
import wikipedia as wp
import json
import re

def check_lemma_by_hand(lemma, entity_type, named_entities):
        # mark meaningless entities
        if lemma in ['сми', 'суббота', 'организация', 'правительство', 'посольство', 'мид', 
                    'министерство иностранный дело', 'министерство', 'обратить', 'комиссия', 'вы', 'администрация',
                    'совет', 'министр иностранный дело', 'председатель', 'министр', 'премьер - министр', 'ваш',
                    'юго - восток', 'север', 'рад', 'смид', 'совет министр иностранный дело', 'восток', 
                     'госдепартамент', 'конгресс', 'скупщина', 'миссия', 'президент']:
            return 'no_count'
        
        # keep only last names of PER
        if entity_type == 'PER':
            last_name = ''
            if re.match('\w+\.\w*\.?.+', lemma):
                last_name = lemma.split('.')[-1].strip()
            elif re.match('.+\w+\.\w*\.?', lemma):
                last_name = lemma.split('.')[0].strip()
            elif ' ' in lemma:
                last_name = lemma.split()[-1].strip()
            if last_name == 'лавр':
                return 'лавров'
            elif last_name != '':
                return last_name
        
        # replace synonyms with one word
        if (lemma == 'содружество' and '|снг|' in named_entities) or lemma == 'содружество независимый государство':
            return 'снг'
        if 'озхий' in lemma:
            return 'озхо'
        if 'скрипал' in lemma:
            return 'скрипаль'
        synonyms = {
            'атр':['азиатско - тихоокеанский регион'],
            'ссср':['советский союз', 'союз советский социалистический республика'],
            'оон':['организация объединить нация'],
            'иг':['исламский государство', 'игил', 'исламский государство ирак и леванта'],
            'сирия':['сара'],
            'евразэс':['евразийский экономический союз'],
            'ес':['евросоюз', 'европейский союз'],
            'шос':['шанхайский организация сотрудничество', 'шанхайский организация сотрудничество ( шос )'],
            'россия':['российский федерация', 'рф'],
            'асеан':['асеана'],
            'астана':['астан'],
            'джабхат ан - нусра':['джабхата ан - нуср'],
            'северный африка':['север африка'],
            'фрг':['германия'],
            'сша':['соединить штат америка', 'соединить штат'],
            'китай':['кнр', 'китайский народный республика'],
            'обсе':['организация по безопасность и сотрудничество в европа'],
            'йемен':['йеменский республика ( йр )'],
            'евроатлантика':['евро - атлантика'],
            'африка':['африканский континент'],
            'нато':['североатлантический альянс'],
            'лаг':['лига арабский государство', 'лига арабский государство ( лаг )'],
            'литва':['литовский республика'],
            'совет европа':['се'],
            'южный корея':['республика корея', 'рк'],
            'северный корея':['кндр', 'корейский народный демократический республика'],
            'тунис':['тунисский республика'],
            'генеральный ассамблея':['га']
        }
        for keyword, synonyms in synonyms.items():
            if lemma in synonyms:
                return keyword
        return 'nan'
    
def check_lemma_auto(lemma, named_entities, all_entities):
    synonyms = {}
    if '(' in lemma:
        lemma_elements = lemma.split('(')
        full_name = lemma_elements[0].strip()
        abbr = lemma_elements[1].strip(' )')
        if full_name in all_entities or abbr in all_entities:
            synonyms[full_name] = [abbr, lemma]
    if '«' in lemma:
        lemma_elements = lemma.split('«')
        name = lemma_elements[1].strip(' »')
        if name in all_entities:
            synonyms[name] = [lemma]
    for keyword, synonyms in synonyms.items():
        if lemma in synonyms:
            return keyword
    return 'nan'
    
def clean_named_entities(named_entities):
    if type(named_entities) != float:
        entities = named_entities.split('\n')
        entities_upd = []
        all_entities = open('all_entities.txt', 'r', encoding='utf-8').read().split('\n')
        for entity in entities:
            entity_data = entity.split('|')
            lemma = entity_data[3]
            entity_type = entity_data[-1]
            new_lemma = check_lemma_auto(lemma, named_entities, all_entities)
            if new_lemma == 'nan':
                new_lemma = check_lemma_by_hand(lemma, entity_type, named_entities)
            if new_lemma == 'nan':
                new_lemma = lemma
            if new_lemma != 'no_count':
                entity_data_upd = '|'.join(entity_data[:3] + [new_lemma] + entity_data[4:])
            else:
                entity_data_upd = 'no_count'
            entities_upd.append(entity_data_upd)
        entities = '\n'.join(entities_upd)
        return entities
    else:
        return float('nan')
    
# join entities that were incorrectly split up
def join_split_entities(df):
    for indx, row in df.iterrows():
        nes = row['named_entities_preprocessed']
        if type(nes) != float:
            nes = nes.split('\n')
            nes_upd = []
            next_lemma = ''
            for i in range(len(nes)):
                ne = nes[i]
                if ne != 'no_count' and next_lemma != 'no_count':
                    curr_ne_data = ne.split('|')
                    end_char_current = int(curr_ne_data[2])
                    curr_lemma = curr_ne_data[3]
                    ent_type_curr = curr_ne_data[4]
                    if i != len(nes)-1 and nes[i+1] != 'no_count':
                        next_ne_data = nes[i+1].split('|')
                        start_char_next = int(next_ne_data[1]) 
                        ent_type_next = next_ne_data[4]
                        if (end_char_current == start_char_next - 1 and ent_type_next != 'PER' 
                        and (ent_type_curr == 'LOC' and ent_type_next == 'LOC') == False):
                            curr_lemma = curr_ne_data[3] + ' ' + next_ne_data[3]
                            next_lemma == 'no_count'
                        else:
                            next_lemma = ''
                    ne_data_str = '|'.join(curr_ne_data[:3] + [curr_lemma] + curr_ne_data[4:])
                    nes_upd.append(ne_data_str)
                else:
                    nes_upd.append('no_count')
                    next_lemma = ''
            nes_upd_str = '\n'.join(nes_upd)
            df.at[indx, 'named_entities_preprocessed'] = nes_upd_str  
    return df

def sort_by_official_dates(df):
    df1 = pd.DataFrame() # < 2008
    df2 = pd.DataFrame() # 2008 - 2013
    df3 = pd.DataFrame() # 2013 - 2016
    df4 = pd.DataFrame() # > 2016
    date1 = datetime.strptime('15-07-2008', '%d-%m-%Y').date()
    date2 = datetime.strptime('20-02-2013', '%d-%m-%Y').date()
    date3 = datetime.strptime('30-11-2016', '%d-%m-%Y').date()
    for index, row in df.iterrows():
        if int(row['Unnamed: 0']) not in [10995, 11494]:
            try:
                date = datetime.strptime(row['date'].strip(), '%d-%m-%Y').date()
            except ValueError:
                date = datetime.strptime(row['date'], '%m-%Y').date()
            if date <= date1:
                df1 = df1.append(row, ignore_index=True)
            if date > date1 and date <= date2:
                df2 = df2.append(row, ignore_index=True)
            if date > date2 and date <= date3:
                df3 = df3.append(row, ignore_index=True)
            if date > date3:
                df4 = df4.append(row, ignore_index=True)
    return df1, df2, df3, df4

# count most freq entities
def get_top_x_ne(df):
    all_entities = []
    for index, row in df.iterrows():
        named_entities = row['named_entities_preprocessed']
        if type(named_entities) != float:
            all_lemmas = []
            named_entities = named_entities.split('\n')
            for named_entity in named_entities:
                if named_entity != 'no_count':
                    all_lemmas.append(named_entity.split('|')[3])
            unique_lemmas = list(set(all_lemmas))
            all_entities.extend(unique_lemmas)

    freq_dict = dict(Counter(all_entities))
    top = sorted(freq_dict.items(), key = lambda kv: kv[1], reverse=True)
    return top

def show_top_x_ne(df, x):
    top = get_top_x_ne(df)
    print(top[:x])

In [5]:
df = pd.read_csv('mfa_texts_df.csv', sep = '\t', encoding = 'utf-8')

In [6]:
# clean named_entities
df['named_entities_preprocessed'] = df['named_entities'].apply(clean_named_entities)
df = join_split_entities(df)

In [4]:
# collect data from wikipedia
wp.set_lang('ru')
synonyms = {}
total = len(df)
n = 0
for nes in df['named_entities_preprocessed']:
    print('Working with line', n, 'out of', total)
    if type(nes) != float:
        nes = nes.split('\n')
        for ne in nes:
            try:
                if ne != 'no_count':
                    lemma = ne.split('|')[3]
                    search_results = wp.search(lemma, results=1)
                    if search_results != []:
                        synonyms[lemma] = search_results[0].lower()
            except:
                print('exception')
                pass
    n += 1

print('DONE WITH PART I')

# aggregate all synonyms
values = list(synonyms.values())
num_occur = dict(Counter(values))
values_with_synonyms = [occur for occur, num in list(num_occur.items()) if num > 1]

synonyms_upd = {}
for occur, synonym in synonyms.items():
    if synonym in values_with_synonyms: 
        synonyms_upd[synonym] = []
        for occur1, synonym1 in synonyms.items():
            if synonym == synonym1:
                synonyms_upd[synonym].append(occur1)
                
print('DONE WITH PART II')

# save synonyms dict to json-file
with open('wikipedia_synonyms.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(synonyms_upd, ensure_ascii=False, indent=4)) 

Working with line 0 out of 12374
Working with line 1 out of 12374
Working with line 2 out of 12374
Working with line 3 out of 12374
Working with line 4 out of 12374
Working with line 5 out of 12374
Working with line 6 out of 12374
Working with line 7 out of 12374
Working with line 8 out of 12374
Working with line 9 out of 12374
Working with line 10 out of 12374
Working with line 11 out of 12374
Working with line 12 out of 12374
Working with line 13 out of 12374
Working with line 14 out of 12374
Working with line 15 out of 12374
Working with line 16 out of 12374
Working with line 17 out of 12374
Working with line 18 out of 12374
Working with line 19 out of 12374
Working with line 20 out of 12374
Working with line 21 out of 12374
Working with line 22 out of 12374
Working with line 23 out of 12374
Working with line 24 out of 12374
Working with line 25 out of 12374
Working with line 26 out of 12374
Working with line 27 out of 12374
Working with line 28 out of 12374
Working with line 29 out

Working with line 237 out of 12374
Working with line 238 out of 12374
Working with line 239 out of 12374
Working with line 240 out of 12374
Working with line 241 out of 12374
Working with line 242 out of 12374
Working with line 243 out of 12374
Working with line 244 out of 12374
Working with line 245 out of 12374
Working with line 246 out of 12374
Working with line 247 out of 12374
Working with line 248 out of 12374
Working with line 249 out of 12374
Working with line 250 out of 12374
Working with line 251 out of 12374
Working with line 252 out of 12374
Working with line 253 out of 12374
Working with line 254 out of 12374
Working with line 255 out of 12374
Working with line 256 out of 12374
Working with line 257 out of 12374
Working with line 258 out of 12374
Working with line 259 out of 12374
Working with line 260 out of 12374
Working with line 261 out of 12374
Working with line 262 out of 12374
Working with line 263 out of 12374
Working with line 264 out of 12374
Working with line 26

Working with line 473 out of 12374
Working with line 474 out of 12374
Working with line 475 out of 12374
Working with line 476 out of 12374
Working with line 477 out of 12374
Working with line 478 out of 12374
Working with line 479 out of 12374
Working with line 480 out of 12374
Working with line 481 out of 12374
Working with line 482 out of 12374
Working with line 483 out of 12374
Working with line 484 out of 12374
Working with line 485 out of 12374
Working with line 486 out of 12374
Working with line 487 out of 12374
Working with line 488 out of 12374
Working with line 489 out of 12374
Working with line 490 out of 12374
Working with line 491 out of 12374
Working with line 492 out of 12374
Working with line 493 out of 12374
Working with line 494 out of 12374
Working with line 495 out of 12374
Working with line 496 out of 12374
Working with line 497 out of 12374
Working with line 498 out of 12374
Working with line 499 out of 12374
Working with line 500 out of 12374
Working with line 50

Working with line 707 out of 12374
Working with line 708 out of 12374
Working with line 709 out of 12374
Working with line 710 out of 12374
Working with line 711 out of 12374
Working with line 712 out of 12374
Working with line 713 out of 12374
Working with line 714 out of 12374
Working with line 715 out of 12374
Working with line 716 out of 12374
Working with line 717 out of 12374
Working with line 718 out of 12374
Working with line 719 out of 12374
Working with line 720 out of 12374
Working with line 721 out of 12374
Working with line 722 out of 12374
Working with line 723 out of 12374
Working with line 724 out of 12374
Working with line 725 out of 12374
Working with line 726 out of 12374
Working with line 727 out of 12374
Working with line 728 out of 12374
Working with line 729 out of 12374
Working with line 730 out of 12374
Working with line 731 out of 12374
Working with line 732 out of 12374
Working with line 733 out of 12374
Working with line 734 out of 12374
Working with line 73

Working with line 941 out of 12374
Working with line 942 out of 12374
Working with line 943 out of 12374
Working with line 944 out of 12374
Working with line 945 out of 12374
Working with line 946 out of 12374
Working with line 947 out of 12374
Working with line 948 out of 12374
Working with line 949 out of 12374
Working with line 950 out of 12374
Working with line 951 out of 12374
Working with line 952 out of 12374
Working with line 953 out of 12374
Working with line 954 out of 12374
Working with line 955 out of 12374
Working with line 956 out of 12374
Working with line 957 out of 12374
Working with line 958 out of 12374
Working with line 959 out of 12374
Working with line 960 out of 12374
Working with line 961 out of 12374
Working with line 962 out of 12374
Working with line 963 out of 12374
Working with line 964 out of 12374
Working with line 965 out of 12374
Working with line 966 out of 12374
Working with line 967 out of 12374
Working with line 968 out of 12374
Working with line 96

Working with line 1171 out of 12374
Working with line 1172 out of 12374
Working with line 1173 out of 12374
Working with line 1174 out of 12374
Working with line 1175 out of 12374
Working with line 1176 out of 12374
Working with line 1177 out of 12374
Working with line 1178 out of 12374
Working with line 1179 out of 12374
Working with line 1180 out of 12374
Working with line 1181 out of 12374
Working with line 1182 out of 12374
Working with line 1183 out of 12374
Working with line 1184 out of 12374
Working with line 1185 out of 12374
Working with line 1186 out of 12374
Working with line 1187 out of 12374
Working with line 1188 out of 12374
Working with line 1189 out of 12374
Working with line 1190 out of 12374
Working with line 1191 out of 12374
Working with line 1192 out of 12374
Working with line 1193 out of 12374
Working with line 1194 out of 12374
Working with line 1195 out of 12374
Working with line 1196 out of 12374
Working with line 1197 out of 12374
Working with line 1198 out o

Working with line 1399 out of 12374
Working with line 1400 out of 12374
Working with line 1401 out of 12374
Working with line 1402 out of 12374
Working with line 1403 out of 12374
Working with line 1404 out of 12374
Working with line 1405 out of 12374
Working with line 1406 out of 12374
Working with line 1407 out of 12374
Working with line 1408 out of 12374
Working with line 1409 out of 12374
Working with line 1410 out of 12374
Working with line 1411 out of 12374
Working with line 1412 out of 12374
Working with line 1413 out of 12374
Working with line 1414 out of 12374
Working with line 1415 out of 12374
Working with line 1416 out of 12374
Working with line 1417 out of 12374
Working with line 1418 out of 12374
Working with line 1419 out of 12374
Working with line 1420 out of 12374
Working with line 1421 out of 12374
Working with line 1422 out of 12374
Working with line 1423 out of 12374
Working with line 1424 out of 12374
Working with line 1425 out of 12374
Working with line 1426 out o

Working with line 1627 out of 12374
Working with line 1628 out of 12374
Working with line 1629 out of 12374
Working with line 1630 out of 12374
Working with line 1631 out of 12374
Working with line 1632 out of 12374
Working with line 1633 out of 12374
Working with line 1634 out of 12374
Working with line 1635 out of 12374
Working with line 1636 out of 12374
Working with line 1637 out of 12374
Working with line 1638 out of 12374
Working with line 1639 out of 12374
Working with line 1640 out of 12374
Working with line 1641 out of 12374
Working with line 1642 out of 12374
Working with line 1643 out of 12374
Working with line 1644 out of 12374
Working with line 1645 out of 12374
Working with line 1646 out of 12374
Working with line 1647 out of 12374
Working with line 1648 out of 12374
Working with line 1649 out of 12374
Working with line 1650 out of 12374
Working with line 1651 out of 12374
Working with line 1652 out of 12374
Working with line 1653 out of 12374
Working with line 1654 out o

Working with line 1855 out of 12374
Working with line 1856 out of 12374
Working with line 1857 out of 12374
Working with line 1858 out of 12374
Working with line 1859 out of 12374
Working with line 1860 out of 12374
Working with line 1861 out of 12374
Working with line 1862 out of 12374
Working with line 1863 out of 12374
Working with line 1864 out of 12374
Working with line 1865 out of 12374
Working with line 1866 out of 12374
Working with line 1867 out of 12374
Working with line 1868 out of 12374
Working with line 1869 out of 12374
Working with line 1870 out of 12374
Working with line 1871 out of 12374
Working with line 1872 out of 12374
Working with line 1873 out of 12374
Working with line 1874 out of 12374
Working with line 1875 out of 12374
Working with line 1876 out of 12374
Working with line 1877 out of 12374
Working with line 1878 out of 12374
Working with line 1879 out of 12374
Working with line 1880 out of 12374
Working with line 1881 out of 12374
Working with line 1882 out o

Working with line 2083 out of 12374
Working with line 2084 out of 12374
Working with line 2085 out of 12374
Working with line 2086 out of 12374
Working with line 2087 out of 12374
Working with line 2088 out of 12374
Working with line 2089 out of 12374
Working with line 2090 out of 12374
Working with line 2091 out of 12374
Working with line 2092 out of 12374
Working with line 2093 out of 12374
Working with line 2094 out of 12374
Working with line 2095 out of 12374
Working with line 2096 out of 12374
Working with line 2097 out of 12374
Working with line 2098 out of 12374
Working with line 2099 out of 12374
Working with line 2100 out of 12374
Working with line 2101 out of 12374
Working with line 2102 out of 12374
Working with line 2103 out of 12374
Working with line 2104 out of 12374
Working with line 2105 out of 12374
Working with line 2106 out of 12374
Working with line 2107 out of 12374
Working with line 2108 out of 12374
Working with line 2109 out of 12374
Working with line 2110 out o

Working with line 2314 out of 12374
Working with line 2315 out of 12374
Working with line 2316 out of 12374
Working with line 2317 out of 12374
Working with line 2318 out of 12374
Working with line 2319 out of 12374
Working with line 2320 out of 12374
Working with line 2321 out of 12374
Working with line 2322 out of 12374
Working with line 2323 out of 12374
Working with line 2324 out of 12374
Working with line 2325 out of 12374
Working with line 2326 out of 12374
Working with line 2327 out of 12374
Working with line 2328 out of 12374
Working with line 2329 out of 12374
Working with line 2330 out of 12374
Working with line 2331 out of 12374
Working with line 2332 out of 12374
Working with line 2333 out of 12374
Working with line 2334 out of 12374
Working with line 2335 out of 12374
Working with line 2336 out of 12374
Working with line 2337 out of 12374
Working with line 2338 out of 12374
Working with line 2339 out of 12374
Working with line 2340 out of 12374
Working with line 2341 out o

Working with line 2542 out of 12374
Working with line 2543 out of 12374
Working with line 2544 out of 12374
Working with line 2545 out of 12374
Working with line 2546 out of 12374
Working with line 2547 out of 12374
Working with line 2548 out of 12374
Working with line 2549 out of 12374
Working with line 2550 out of 12374
Working with line 2551 out of 12374
Working with line 2552 out of 12374
Working with line 2553 out of 12374
Working with line 2554 out of 12374
Working with line 2555 out of 12374
Working with line 2556 out of 12374
Working with line 2557 out of 12374
Working with line 2558 out of 12374
Working with line 2559 out of 12374
Working with line 2560 out of 12374
Working with line 2561 out of 12374
Working with line 2562 out of 12374
Working with line 2563 out of 12374
Working with line 2564 out of 12374
Working with line 2565 out of 12374
Working with line 2566 out of 12374
Working with line 2567 out of 12374
Working with line 2568 out of 12374
Working with line 2569 out o

Working with line 2770 out of 12374
Working with line 2771 out of 12374
Working with line 2772 out of 12374
Working with line 2773 out of 12374
Working with line 2774 out of 12374
Working with line 2775 out of 12374
Working with line 2776 out of 12374
Working with line 2777 out of 12374
Working with line 2778 out of 12374
Working with line 2779 out of 12374
Working with line 2780 out of 12374
Working with line 2781 out of 12374
Working with line 2782 out of 12374
Working with line 2783 out of 12374
Working with line 2784 out of 12374
Working with line 2785 out of 12374
Working with line 2786 out of 12374
Working with line 2787 out of 12374
Working with line 2788 out of 12374
Working with line 2789 out of 12374
Working with line 2790 out of 12374
Working with line 2791 out of 12374
Working with line 2792 out of 12374
Working with line 2793 out of 12374
Working with line 2794 out of 12374
Working with line 2795 out of 12374
Working with line 2796 out of 12374
Working with line 2797 out o

Working with line 3000 out of 12374
Working with line 3001 out of 12374
Working with line 3002 out of 12374
Working with line 3003 out of 12374
Working with line 3004 out of 12374
Working with line 3005 out of 12374
Working with line 3006 out of 12374
Working with line 3007 out of 12374
Working with line 3008 out of 12374
Working with line 3009 out of 12374
Working with line 3010 out of 12374
Working with line 3011 out of 12374
Working with line 3012 out of 12374
Working with line 3013 out of 12374
Working with line 3014 out of 12374
Working with line 3015 out of 12374
Working with line 3016 out of 12374
Working with line 3017 out of 12374
Working with line 3018 out of 12374
Working with line 3019 out of 12374
Working with line 3020 out of 12374
Working with line 3021 out of 12374
Working with line 3022 out of 12374
Working with line 3023 out of 12374
Working with line 3024 out of 12374
Working with line 3025 out of 12374
Working with line 3026 out of 12374
Working with line 3027 out o

Working with line 3228 out of 12374
Working with line 3229 out of 12374
Working with line 3230 out of 12374
Working with line 3231 out of 12374
Working with line 3232 out of 12374
Working with line 3233 out of 12374
Working with line 3234 out of 12374
Working with line 3235 out of 12374
Working with line 3236 out of 12374
Working with line 3237 out of 12374
Working with line 3238 out of 12374
Working with line 3239 out of 12374
Working with line 3240 out of 12374
Working with line 3241 out of 12374
Working with line 3242 out of 12374
Working with line 3243 out of 12374
Working with line 3244 out of 12374
Working with line 3245 out of 12374
Working with line 3246 out of 12374
Working with line 3247 out of 12374
Working with line 3248 out of 12374
Working with line 3249 out of 12374
Working with line 3250 out of 12374
Working with line 3251 out of 12374
Working with line 3252 out of 12374
Working with line 3253 out of 12374
Working with line 3254 out of 12374
Working with line 3255 out o

Working with line 3456 out of 12374
Working with line 3457 out of 12374
Working with line 3458 out of 12374
Working with line 3459 out of 12374
Working with line 3460 out of 12374
Working with line 3461 out of 12374
Working with line 3462 out of 12374
Working with line 3463 out of 12374
Working with line 3464 out of 12374
Working with line 3465 out of 12374
Working with line 3466 out of 12374
Working with line 3467 out of 12374
Working with line 3468 out of 12374
Working with line 3469 out of 12374
Working with line 3470 out of 12374
Working with line 3471 out of 12374
Working with line 3472 out of 12374
Working with line 3473 out of 12374
Working with line 3474 out of 12374
Working with line 3475 out of 12374
Working with line 3476 out of 12374
Working with line 3477 out of 12374
Working with line 3478 out of 12374
Working with line 3479 out of 12374
Working with line 3480 out of 12374
Working with line 3481 out of 12374
Working with line 3482 out of 12374
Working with line 3483 out o

Working with line 3685 out of 12374
Working with line 3686 out of 12374
Working with line 3687 out of 12374
Working with line 3688 out of 12374
Working with line 3689 out of 12374
Working with line 3690 out of 12374
Working with line 3691 out of 12374
Working with line 3692 out of 12374
Working with line 3693 out of 12374
Working with line 3694 out of 12374
Working with line 3695 out of 12374
Working with line 3696 out of 12374
Working with line 3697 out of 12374
Working with line 3698 out of 12374
Working with line 3699 out of 12374
Working with line 3700 out of 12374
Working with line 3701 out of 12374
Working with line 3702 out of 12374
Working with line 3703 out of 12374
Working with line 3704 out of 12374
Working with line 3705 out of 12374
Working with line 3706 out of 12374
Working with line 3707 out of 12374
Working with line 3708 out of 12374
Working with line 3709 out of 12374
Working with line 3710 out of 12374
Working with line 3711 out of 12374
Working with line 3712 out o

Working with line 3913 out of 12374
Working with line 3914 out of 12374
Working with line 3915 out of 12374
Working with line 3916 out of 12374
Working with line 3917 out of 12374
Working with line 3918 out of 12374
Working with line 3919 out of 12374
Working with line 3920 out of 12374
Working with line 3921 out of 12374
Working with line 3922 out of 12374
Working with line 3923 out of 12374
Working with line 3924 out of 12374
Working with line 3925 out of 12374
Working with line 3926 out of 12374
Working with line 3927 out of 12374
Working with line 3928 out of 12374
Working with line 3929 out of 12374
Working with line 3930 out of 12374
Working with line 3931 out of 12374
Working with line 3932 out of 12374
Working with line 3933 out of 12374
Working with line 3934 out of 12374
Working with line 3935 out of 12374
Working with line 3936 out of 12374
Working with line 3937 out of 12374
Working with line 3938 out of 12374
Working with line 3939 out of 12374
Working with line 3940 out o

Working with line 4142 out of 12374
Working with line 4143 out of 12374
Working with line 4144 out of 12374
Working with line 4145 out of 12374
Working with line 4146 out of 12374
Working with line 4147 out of 12374
Working with line 4148 out of 12374
Working with line 4149 out of 12374
Working with line 4150 out of 12374
Working with line 4151 out of 12374
Working with line 4152 out of 12374
Working with line 4153 out of 12374
Working with line 4154 out of 12374
Working with line 4155 out of 12374
Working with line 4156 out of 12374
Working with line 4157 out of 12374
Working with line 4158 out of 12374
Working with line 4159 out of 12374
Working with line 4160 out of 12374
Working with line 4161 out of 12374
Working with line 4162 out of 12374
Working with line 4163 out of 12374
Working with line 4164 out of 12374
Working with line 4165 out of 12374
Working with line 4166 out of 12374
Working with line 4167 out of 12374
Working with line 4168 out of 12374
Working with line 4169 out o

Working with line 4370 out of 12374
Working with line 4371 out of 12374
Working with line 4372 out of 12374
Working with line 4373 out of 12374
Working with line 4374 out of 12374
Working with line 4375 out of 12374
Working with line 4376 out of 12374
Working with line 4377 out of 12374
Working with line 4378 out of 12374
Working with line 4379 out of 12374
Working with line 4380 out of 12374
Working with line 4381 out of 12374
Working with line 4382 out of 12374
Working with line 4383 out of 12374
Working with line 4384 out of 12374
Working with line 4385 out of 12374
Working with line 4386 out of 12374
Working with line 4387 out of 12374
Working with line 4388 out of 12374
Working with line 4389 out of 12374
Working with line 4390 out of 12374
Working with line 4391 out of 12374
Working with line 4392 out of 12374
Working with line 4393 out of 12374
Working with line 4394 out of 12374
Working with line 4395 out of 12374
Working with line 4396 out of 12374
Working with line 4397 out o

Working with line 4598 out of 12374
Working with line 4599 out of 12374
Working with line 4600 out of 12374
Working with line 4601 out of 12374
Working with line 4602 out of 12374
Working with line 4603 out of 12374
Working with line 4604 out of 12374
Working with line 4605 out of 12374
Working with line 4606 out of 12374
Working with line 4607 out of 12374
Working with line 4608 out of 12374
Working with line 4609 out of 12374
Working with line 4610 out of 12374
Working with line 4611 out of 12374
Working with line 4612 out of 12374
Working with line 4613 out of 12374
Working with line 4614 out of 12374
Working with line 4615 out of 12374
Working with line 4616 out of 12374
Working with line 4617 out of 12374
Working with line 4618 out of 12374
Working with line 4619 out of 12374
Working with line 4620 out of 12374
Working with line 4621 out of 12374
Working with line 4622 out of 12374
Working with line 4623 out of 12374
Working with line 4624 out of 12374
Working with line 4625 out o

Working with line 4827 out of 12374
Working with line 4828 out of 12374
Working with line 4829 out of 12374
Working with line 4830 out of 12374
Working with line 4831 out of 12374
Working with line 4832 out of 12374
Working with line 4833 out of 12374
Working with line 4834 out of 12374
Working with line 4835 out of 12374
Working with line 4836 out of 12374
Working with line 4837 out of 12374
Working with line 4838 out of 12374
Working with line 4839 out of 12374
Working with line 4840 out of 12374
Working with line 4841 out of 12374
Working with line 4842 out of 12374
Working with line 4843 out of 12374
Working with line 4844 out of 12374
Working with line 4845 out of 12374
Working with line 4846 out of 12374
Working with line 4847 out of 12374
Working with line 4848 out of 12374
Working with line 4849 out of 12374
Working with line 4850 out of 12374
Working with line 4851 out of 12374
Working with line 4852 out of 12374
Working with line 4853 out of 12374
Working with line 4854 out o

Working with line 5056 out of 12374
Working with line 5057 out of 12374
Working with line 5058 out of 12374
Working with line 5059 out of 12374
Working with line 5060 out of 12374
Working with line 5061 out of 12374
Working with line 5062 out of 12374
Working with line 5063 out of 12374
Working with line 5064 out of 12374
Working with line 5065 out of 12374
Working with line 5066 out of 12374
Working with line 5067 out of 12374
Working with line 5068 out of 12374
Working with line 5069 out of 12374
Working with line 5070 out of 12374
Working with line 5071 out of 12374
Working with line 5072 out of 12374
Working with line 5073 out of 12374
Working with line 5074 out of 12374
Working with line 5075 out of 12374
Working with line 5076 out of 12374
Working with line 5077 out of 12374
Working with line 5078 out of 12374
Working with line 5079 out of 12374
Working with line 5080 out of 12374
Working with line 5081 out of 12374
Working with line 5082 out of 12374
Working with line 5083 out o

Working with line 5284 out of 12374
Working with line 5285 out of 12374
Working with line 5286 out of 12374
Working with line 5287 out of 12374
Working with line 5288 out of 12374
Working with line 5289 out of 12374
Working with line 5290 out of 12374
Working with line 5291 out of 12374
Working with line 5292 out of 12374
Working with line 5293 out of 12374
Working with line 5294 out of 12374
Working with line 5295 out of 12374
Working with line 5296 out of 12374
Working with line 5297 out of 12374
Working with line 5298 out of 12374
Working with line 5299 out of 12374
Working with line 5300 out of 12374
Working with line 5301 out of 12374
Working with line 5302 out of 12374
Working with line 5303 out of 12374
Working with line 5304 out of 12374
Working with line 5305 out of 12374
Working with line 5306 out of 12374
Working with line 5307 out of 12374
Working with line 5308 out of 12374
Working with line 5309 out of 12374
Working with line 5310 out of 12374
Working with line 5311 out o

Working with line 5514 out of 12374
Working with line 5515 out of 12374
Working with line 5516 out of 12374
Working with line 5517 out of 12374
Working with line 5518 out of 12374
Working with line 5519 out of 12374
Working with line 5520 out of 12374
Working with line 5521 out of 12374
Working with line 5522 out of 12374
Working with line 5523 out of 12374
Working with line 5524 out of 12374
Working with line 5525 out of 12374
Working with line 5526 out of 12374
Working with line 5527 out of 12374
Working with line 5528 out of 12374
Working with line 5529 out of 12374
Working with line 5530 out of 12374
Working with line 5531 out of 12374
Working with line 5532 out of 12374
Working with line 5533 out of 12374
Working with line 5534 out of 12374
Working with line 5535 out of 12374
Working with line 5536 out of 12374
Working with line 5537 out of 12374
Working with line 5538 out of 12374
Working with line 5539 out of 12374
Working with line 5540 out of 12374
Working with line 5541 out o

Working with line 5742 out of 12374
Working with line 5743 out of 12374
Working with line 5744 out of 12374
Working with line 5745 out of 12374
Working with line 5746 out of 12374
Working with line 5747 out of 12374
Working with line 5748 out of 12374
Working with line 5749 out of 12374
Working with line 5750 out of 12374
Working with line 5751 out of 12374
Working with line 5752 out of 12374
Working with line 5753 out of 12374
Working with line 5754 out of 12374
Working with line 5755 out of 12374
Working with line 5756 out of 12374
Working with line 5757 out of 12374
Working with line 5758 out of 12374
Working with line 5759 out of 12374
Working with line 5760 out of 12374
Working with line 5761 out of 12374
Working with line 5762 out of 12374
Working with line 5763 out of 12374
Working with line 5764 out of 12374
Working with line 5765 out of 12374
Working with line 5766 out of 12374
Working with line 5767 out of 12374
Working with line 5768 out of 12374
Working with line 5769 out o

Working with line 5973 out of 12374
Working with line 5974 out of 12374
Working with line 5975 out of 12374
Working with line 5976 out of 12374
Working with line 5977 out of 12374
Working with line 5978 out of 12374
Working with line 5979 out of 12374
Working with line 5980 out of 12374
Working with line 5981 out of 12374
Working with line 5982 out of 12374
Working with line 5983 out of 12374
Working with line 5984 out of 12374
Working with line 5985 out of 12374
Working with line 5986 out of 12374
Working with line 5987 out of 12374
Working with line 5988 out of 12374
Working with line 5989 out of 12374
Working with line 5990 out of 12374
Working with line 5991 out of 12374
Working with line 5992 out of 12374
Working with line 5993 out of 12374
Working with line 5994 out of 12374
Working with line 5995 out of 12374
Working with line 5996 out of 12374
Working with line 5997 out of 12374
Working with line 5998 out of 12374
Working with line 5999 out of 12374
Working with line 6000 out o

Working with line 6202 out of 12374
Working with line 6203 out of 12374
Working with line 6204 out of 12374
Working with line 6205 out of 12374
Working with line 6206 out of 12374
Working with line 6207 out of 12374
Working with line 6208 out of 12374
Working with line 6209 out of 12374
Working with line 6210 out of 12374
Working with line 6211 out of 12374
Working with line 6212 out of 12374
Working with line 6213 out of 12374
Working with line 6214 out of 12374
Working with line 6215 out of 12374
Working with line 6216 out of 12374
Working with line 6217 out of 12374
Working with line 6218 out of 12374
Working with line 6219 out of 12374
Working with line 6220 out of 12374
Working with line 6221 out of 12374
Working with line 6222 out of 12374
Working with line 6223 out of 12374
Working with line 6224 out of 12374
Working with line 6225 out of 12374
Working with line 6226 out of 12374
Working with line 6227 out of 12374
Working with line 6228 out of 12374
Working with line 6229 out o

Working with line 6433 out of 12374
Working with line 6434 out of 12374
Working with line 6435 out of 12374
Working with line 6436 out of 12374
Working with line 6437 out of 12374
Working with line 6438 out of 12374
Working with line 6439 out of 12374
Working with line 6440 out of 12374
Working with line 6441 out of 12374
Working with line 6442 out of 12374
Working with line 6443 out of 12374
Working with line 6444 out of 12374
Working with line 6445 out of 12374
Working with line 6446 out of 12374
Working with line 6447 out of 12374
Working with line 6448 out of 12374
Working with line 6449 out of 12374
Working with line 6450 out of 12374
Working with line 6451 out of 12374
Working with line 6452 out of 12374
Working with line 6453 out of 12374
Working with line 6454 out of 12374
Working with line 6455 out of 12374
Working with line 6456 out of 12374
Working with line 6457 out of 12374
Working with line 6458 out of 12374
Working with line 6459 out of 12374
Working with line 6460 out o

Working with line 6663 out of 12374
Working with line 6664 out of 12374
Working with line 6665 out of 12374
Working with line 6666 out of 12374
Working with line 6667 out of 12374
Working with line 6668 out of 12374
Working with line 6669 out of 12374
Working with line 6670 out of 12374
Working with line 6671 out of 12374
Working with line 6672 out of 12374
Working with line 6673 out of 12374
Working with line 6674 out of 12374
Working with line 6675 out of 12374
Working with line 6676 out of 12374
Working with line 6677 out of 12374
Working with line 6678 out of 12374
Working with line 6679 out of 12374
Working with line 6680 out of 12374
Working with line 6681 out of 12374
Working with line 6682 out of 12374
Working with line 6683 out of 12374
Working with line 6684 out of 12374
Working with line 6685 out of 12374
Working with line 6686 out of 12374
Working with line 6687 out of 12374
Working with line 6688 out of 12374
Working with line 6689 out of 12374
Working with line 6690 out o

Working with line 6891 out of 12374
Working with line 6892 out of 12374
Working with line 6893 out of 12374
Working with line 6894 out of 12374
Working with line 6895 out of 12374
Working with line 6896 out of 12374
Working with line 6897 out of 12374
Working with line 6898 out of 12374
Working with line 6899 out of 12374
Working with line 6900 out of 12374
Working with line 6901 out of 12374
Working with line 6902 out of 12374
Working with line 6903 out of 12374
Working with line 6904 out of 12374
Working with line 6905 out of 12374
Working with line 6906 out of 12374
Working with line 6907 out of 12374
Working with line 6908 out of 12374
Working with line 6909 out of 12374
Working with line 6910 out of 12374
Working with line 6911 out of 12374
Working with line 6912 out of 12374
Working with line 6913 out of 12374
Working with line 6914 out of 12374
Working with line 6915 out of 12374
Working with line 6916 out of 12374
Working with line 6917 out of 12374
Working with line 6918 out o

Working with line 7119 out of 12374
Working with line 7120 out of 12374
Working with line 7121 out of 12374
Working with line 7122 out of 12374
Working with line 7123 out of 12374
Working with line 7124 out of 12374
Working with line 7125 out of 12374
Working with line 7126 out of 12374
Working with line 7127 out of 12374
Working with line 7128 out of 12374
Working with line 7129 out of 12374
Working with line 7130 out of 12374
Working with line 7131 out of 12374
Working with line 7132 out of 12374
Working with line 7133 out of 12374
Working with line 7134 out of 12374
Working with line 7135 out of 12374
Working with line 7136 out of 12374
Working with line 7137 out of 12374
Working with line 7138 out of 12374
Working with line 7139 out of 12374
Working with line 7140 out of 12374
Working with line 7141 out of 12374
Working with line 7142 out of 12374
Working with line 7143 out of 12374
Working with line 7144 out of 12374
Working with line 7145 out of 12374
Working with line 7146 out o

Working with line 7347 out of 12374
Working with line 7348 out of 12374
Working with line 7349 out of 12374
Working with line 7350 out of 12374
Working with line 7351 out of 12374
Working with line 7352 out of 12374
Working with line 7353 out of 12374
Working with line 7354 out of 12374
Working with line 7355 out of 12374
Working with line 7356 out of 12374
Working with line 7357 out of 12374
Working with line 7358 out of 12374
Working with line 7359 out of 12374
Working with line 7360 out of 12374
Working with line 7361 out of 12374
Working with line 7362 out of 12374
Working with line 7363 out of 12374
Working with line 7364 out of 12374
Working with line 7365 out of 12374
Working with line 7366 out of 12374
Working with line 7367 out of 12374
Working with line 7368 out of 12374
Working with line 7369 out of 12374
Working with line 7370 out of 12374
Working with line 7371 out of 12374
Working with line 7372 out of 12374
Working with line 7373 out of 12374
Working with line 7374 out o

Working with line 7575 out of 12374
Working with line 7576 out of 12374
Working with line 7577 out of 12374
Working with line 7578 out of 12374
Working with line 7579 out of 12374
Working with line 7580 out of 12374
Working with line 7581 out of 12374
Working with line 7582 out of 12374
Working with line 7583 out of 12374
Working with line 7584 out of 12374
Working with line 7585 out of 12374
Working with line 7586 out of 12374
Working with line 7587 out of 12374
Working with line 7588 out of 12374
Working with line 7589 out of 12374
Working with line 7590 out of 12374
Working with line 7591 out of 12374
Working with line 7592 out of 12374
Working with line 7593 out of 12374
Working with line 7594 out of 12374
Working with line 7595 out of 12374
Working with line 7596 out of 12374
Working with line 7597 out of 12374
Working with line 7598 out of 12374
Working with line 7599 out of 12374
Working with line 7600 out of 12374
Working with line 7601 out of 12374
Working with line 7602 out o

Working with line 7804 out of 12374
Working with line 7805 out of 12374
Working with line 7806 out of 12374
Working with line 7807 out of 12374
Working with line 7808 out of 12374
Working with line 7809 out of 12374
Working with line 7810 out of 12374
Working with line 7811 out of 12374
Working with line 7812 out of 12374
Working with line 7813 out of 12374
Working with line 7814 out of 12374
Working with line 7815 out of 12374
Working with line 7816 out of 12374
Working with line 7817 out of 12374
Working with line 7818 out of 12374
Working with line 7819 out of 12374
Working with line 7820 out of 12374
Working with line 7821 out of 12374
Working with line 7822 out of 12374
Working with line 7823 out of 12374
Working with line 7824 out of 12374
Working with line 7825 out of 12374
Working with line 7826 out of 12374
Working with line 7827 out of 12374
Working with line 7828 out of 12374
Working with line 7829 out of 12374
Working with line 7830 out of 12374
Working with line 7831 out o

Working with line 8032 out of 12374
Working with line 8033 out of 12374
Working with line 8034 out of 12374
Working with line 8035 out of 12374
Working with line 8036 out of 12374
Working with line 8037 out of 12374
Working with line 8038 out of 12374
Working with line 8039 out of 12374
Working with line 8040 out of 12374
Working with line 8041 out of 12374
Working with line 8042 out of 12374
Working with line 8043 out of 12374
Working with line 8044 out of 12374
Working with line 8045 out of 12374
Working with line 8046 out of 12374
Working with line 8047 out of 12374
Working with line 8048 out of 12374
Working with line 8049 out of 12374
Working with line 8050 out of 12374
Working with line 8051 out of 12374
Working with line 8052 out of 12374
Working with line 8053 out of 12374
Working with line 8054 out of 12374
Working with line 8055 out of 12374
Working with line 8056 out of 12374
Working with line 8057 out of 12374
Working with line 8058 out of 12374
Working with line 8059 out o

Working with line 8260 out of 12374
Working with line 8261 out of 12374
Working with line 8262 out of 12374
Working with line 8263 out of 12374
Working with line 8264 out of 12374
Working with line 8265 out of 12374
Working with line 8266 out of 12374
Working with line 8267 out of 12374
Working with line 8268 out of 12374
Working with line 8269 out of 12374
Working with line 8270 out of 12374
Working with line 8271 out of 12374
Working with line 8272 out of 12374
Working with line 8273 out of 12374
Working with line 8274 out of 12374
Working with line 8275 out of 12374
Working with line 8276 out of 12374
Working with line 8277 out of 12374
Working with line 8278 out of 12374
Working with line 8279 out of 12374
Working with line 8280 out of 12374
Working with line 8281 out of 12374
Working with line 8282 out of 12374
Working with line 8283 out of 12374
Working with line 8284 out of 12374
Working with line 8285 out of 12374
Working with line 8286 out of 12374
Working with line 8287 out o

Working with line 8488 out of 12374
Working with line 8489 out of 12374
Working with line 8490 out of 12374
Working with line 8491 out of 12374
Working with line 8492 out of 12374
Working with line 8493 out of 12374
Working with line 8494 out of 12374
Working with line 8495 out of 12374
Working with line 8496 out of 12374
Working with line 8497 out of 12374
Working with line 8498 out of 12374
Working with line 8499 out of 12374
Working with line 8500 out of 12374
Working with line 8501 out of 12374
Working with line 8502 out of 12374
Working with line 8503 out of 12374
Working with line 8504 out of 12374
Working with line 8505 out of 12374
Working with line 8506 out of 12374
Working with line 8507 out of 12374
Working with line 8508 out of 12374
Working with line 8509 out of 12374
Working with line 8510 out of 12374
Working with line 8511 out of 12374
Working with line 8512 out of 12374
Working with line 8513 out of 12374
Working with line 8514 out of 12374
Working with line 8515 out o

Working with line 8717 out of 12374
Working with line 8718 out of 12374
Working with line 8719 out of 12374
Working with line 8720 out of 12374
Working with line 8721 out of 12374
Working with line 8722 out of 12374
Working with line 8723 out of 12374
Working with line 8724 out of 12374
Working with line 8725 out of 12374
Working with line 8726 out of 12374
Working with line 8727 out of 12374
Working with line 8728 out of 12374
Working with line 8729 out of 12374
Working with line 8730 out of 12374
Working with line 8731 out of 12374
Working with line 8732 out of 12374
Working with line 8733 out of 12374
Working with line 8734 out of 12374
Working with line 8735 out of 12374
Working with line 8736 out of 12374
Working with line 8737 out of 12374
Working with line 8738 out of 12374
Working with line 8739 out of 12374
Working with line 8740 out of 12374
Working with line 8741 out of 12374
Working with line 8742 out of 12374
Working with line 8743 out of 12374
Working with line 8744 out o

Working with line 8945 out of 12374
Working with line 8946 out of 12374
Working with line 8947 out of 12374
Working with line 8948 out of 12374
Working with line 8949 out of 12374
Working with line 8950 out of 12374
Working with line 8951 out of 12374
Working with line 8952 out of 12374
Working with line 8953 out of 12374
Working with line 8954 out of 12374
Working with line 8955 out of 12374
Working with line 8956 out of 12374
Working with line 8957 out of 12374
Working with line 8958 out of 12374
Working with line 8959 out of 12374
Working with line 8960 out of 12374
Working with line 8961 out of 12374
Working with line 8962 out of 12374
Working with line 8963 out of 12374
Working with line 8964 out of 12374
Working with line 8965 out of 12374
Working with line 8966 out of 12374
Working with line 8967 out of 12374
Working with line 8968 out of 12374
Working with line 8969 out of 12374
Working with line 8970 out of 12374
Working with line 8971 out of 12374
Working with line 8972 out o

Working with line 9173 out of 12374
Working with line 9174 out of 12374
Working with line 9175 out of 12374
Working with line 9176 out of 12374
Working with line 9177 out of 12374
Working with line 9178 out of 12374
Working with line 9179 out of 12374
Working with line 9180 out of 12374
Working with line 9181 out of 12374
Working with line 9182 out of 12374
Working with line 9183 out of 12374
Working with line 9184 out of 12374
Working with line 9185 out of 12374
Working with line 9186 out of 12374
Working with line 9187 out of 12374
Working with line 9188 out of 12374
Working with line 9189 out of 12374
Working with line 9190 out of 12374
Working with line 9191 out of 12374
Working with line 9192 out of 12374
Working with line 9193 out of 12374
Working with line 9194 out of 12374
Working with line 9195 out of 12374
Working with line 9196 out of 12374
Working with line 9197 out of 12374
Working with line 9198 out of 12374
Working with line 9199 out of 12374
Working with line 9200 out o

Working with line 9402 out of 12374
Working with line 9403 out of 12374
Working with line 9404 out of 12374
Working with line 9405 out of 12374
Working with line 9406 out of 12374
Working with line 9407 out of 12374
Working with line 9408 out of 12374
Working with line 9409 out of 12374
Working with line 9410 out of 12374
Working with line 9411 out of 12374
Working with line 9412 out of 12374
Working with line 9413 out of 12374
Working with line 9414 out of 12374
Working with line 9415 out of 12374
Working with line 9416 out of 12374
Working with line 9417 out of 12374
Working with line 9418 out of 12374
Working with line 9419 out of 12374
Working with line 9420 out of 12374
Working with line 9421 out of 12374
Working with line 9422 out of 12374
Working with line 9423 out of 12374
Working with line 9424 out of 12374
Working with line 9425 out of 12374
Working with line 9426 out of 12374
Working with line 9427 out of 12374
Working with line 9428 out of 12374
Working with line 9429 out o

Working with line 9632 out of 12374
Working with line 9633 out of 12374
Working with line 9634 out of 12374
Working with line 9635 out of 12374
Working with line 9636 out of 12374
Working with line 9637 out of 12374
Working with line 9638 out of 12374
Working with line 9639 out of 12374
Working with line 9640 out of 12374
Working with line 9641 out of 12374
Working with line 9642 out of 12374
Working with line 9643 out of 12374
Working with line 9644 out of 12374
Working with line 9645 out of 12374
Working with line 9646 out of 12374
Working with line 9647 out of 12374
Working with line 9648 out of 12374
Working with line 9649 out of 12374
Working with line 9650 out of 12374
Working with line 9651 out of 12374
Working with line 9652 out of 12374
Working with line 9653 out of 12374
Working with line 9654 out of 12374
Working with line 9655 out of 12374
Working with line 9656 out of 12374
Working with line 9657 out of 12374
Working with line 9658 out of 12374
Working with line 9659 out o

Working with line 9862 out of 12374
Working with line 9863 out of 12374
Working with line 9864 out of 12374
Working with line 9865 out of 12374
Working with line 9866 out of 12374
Working with line 9867 out of 12374
Working with line 9868 out of 12374
Working with line 9869 out of 12374
Working with line 9870 out of 12374
Working with line 9871 out of 12374
Working with line 9872 out of 12374
Working with line 9873 out of 12374
Working with line 9874 out of 12374
Working with line 9875 out of 12374
Working with line 9876 out of 12374
Working with line 9877 out of 12374
Working with line 9878 out of 12374
Working with line 9879 out of 12374
Working with line 9880 out of 12374
Working with line 9881 out of 12374
Working with line 9882 out of 12374
Working with line 9883 out of 12374
Working with line 9884 out of 12374
Working with line 9885 out of 12374
Working with line 9886 out of 12374
Working with line 9887 out of 12374
Working with line 9888 out of 12374
Working with line 9889 out o

Working with line 10087 out of 12374
Working with line 10088 out of 12374
Working with line 10089 out of 12374
Working with line 10090 out of 12374
Working with line 10091 out of 12374
Working with line 10092 out of 12374
Working with line 10093 out of 12374
Working with line 10094 out of 12374
Working with line 10095 out of 12374
Working with line 10096 out of 12374
Working with line 10097 out of 12374
Working with line 10098 out of 12374
Working with line 10099 out of 12374
Working with line 10100 out of 12374
Working with line 10101 out of 12374
Working with line 10102 out of 12374
Working with line 10103 out of 12374
Working with line 10104 out of 12374
Working with line 10105 out of 12374
Working with line 10106 out of 12374
Working with line 10107 out of 12374
Working with line 10108 out of 12374
Working with line 10109 out of 12374
Working with line 10110 out of 12374
Working with line 10111 out of 12374
Working with line 10112 out of 12374
Working with line 10113 out of 12374
W

Working with line 10309 out of 12374
Working with line 10310 out of 12374
Working with line 10311 out of 12374
Working with line 10312 out of 12374
Working with line 10313 out of 12374
Working with line 10314 out of 12374
Working with line 10315 out of 12374
Working with line 10316 out of 12374
Working with line 10317 out of 12374
Working with line 10318 out of 12374
Working with line 10319 out of 12374
Working with line 10320 out of 12374
Working with line 10321 out of 12374
Working with line 10322 out of 12374
Working with line 10323 out of 12374
Working with line 10324 out of 12374
Working with line 10325 out of 12374
Working with line 10326 out of 12374
Working with line 10327 out of 12374
Working with line 10328 out of 12374
Working with line 10329 out of 12374
Working with line 10330 out of 12374
Working with line 10331 out of 12374
Working with line 10332 out of 12374
Working with line 10333 out of 12374
Working with line 10334 out of 12374
Working with line 10335 out of 12374
W

Working with line 10531 out of 12374
Working with line 10532 out of 12374
Working with line 10533 out of 12374
Working with line 10534 out of 12374
Working with line 10535 out of 12374
Working with line 10536 out of 12374
Working with line 10537 out of 12374
Working with line 10538 out of 12374
Working with line 10539 out of 12374
Working with line 10540 out of 12374
Working with line 10541 out of 12374
Working with line 10542 out of 12374
Working with line 10543 out of 12374
Working with line 10544 out of 12374
Working with line 10545 out of 12374
Working with line 10546 out of 12374
Working with line 10547 out of 12374
Working with line 10548 out of 12374
Working with line 10549 out of 12374
Working with line 10550 out of 12374
Working with line 10551 out of 12374
Working with line 10552 out of 12374
Working with line 10553 out of 12374
Working with line 10554 out of 12374
Working with line 10555 out of 12374
Working with line 10556 out of 12374
Working with line 10557 out of 12374
W

Working with line 10755 out of 12374
Working with line 10756 out of 12374
Working with line 10757 out of 12374
Working with line 10758 out of 12374
Working with line 10759 out of 12374
Working with line 10760 out of 12374
Working with line 10761 out of 12374
Working with line 10762 out of 12374
Working with line 10763 out of 12374
Working with line 10764 out of 12374
Working with line 10765 out of 12374
Working with line 10766 out of 12374
Working with line 10767 out of 12374
Working with line 10768 out of 12374
Working with line 10769 out of 12374
Working with line 10770 out of 12374
Working with line 10771 out of 12374
Working with line 10772 out of 12374
Working with line 10773 out of 12374
Working with line 10774 out of 12374
Working with line 10775 out of 12374
Working with line 10776 out of 12374
Working with line 10777 out of 12374
Working with line 10778 out of 12374
Working with line 10779 out of 12374
Working with line 10780 out of 12374
Working with line 10781 out of 12374
W

Working with line 10979 out of 12374
Working with line 10980 out of 12374
Working with line 10981 out of 12374
Working with line 10982 out of 12374
Working with line 10983 out of 12374
Working with line 10984 out of 12374
Working with line 10985 out of 12374
Working with line 10986 out of 12374
Working with line 10987 out of 12374
Working with line 10988 out of 12374
Working with line 10989 out of 12374
Working with line 10990 out of 12374
Working with line 10991 out of 12374
Working with line 10992 out of 12374
Working with line 10993 out of 12374
Working with line 10994 out of 12374
Working with line 10995 out of 12374
Working with line 10996 out of 12374
Working with line 10997 out of 12374
Working with line 10998 out of 12374
Working with line 10999 out of 12374
Working with line 11000 out of 12374
Working with line 11001 out of 12374
Working with line 11002 out of 12374
Working with line 11003 out of 12374
Working with line 11004 out of 12374
Working with line 11005 out of 12374
W

Working with line 11201 out of 12374
Working with line 11202 out of 12374
Working with line 11203 out of 12374
Working with line 11204 out of 12374
Working with line 11205 out of 12374
Working with line 11206 out of 12374
Working with line 11207 out of 12374
Working with line 11208 out of 12374
Working with line 11209 out of 12374
Working with line 11210 out of 12374
Working with line 11211 out of 12374
Working with line 11212 out of 12374
Working with line 11213 out of 12374
Working with line 11214 out of 12374
Working with line 11215 out of 12374
Working with line 11216 out of 12374
Working with line 11217 out of 12374
Working with line 11218 out of 12374
Working with line 11219 out of 12374
Working with line 11220 out of 12374
Working with line 11221 out of 12374
Working with line 11222 out of 12374
Working with line 11223 out of 12374
Working with line 11224 out of 12374
Working with line 11225 out of 12374
Working with line 11226 out of 12374
Working with line 11227 out of 12374
W

Working with line 11423 out of 12374
Working with line 11424 out of 12374
Working with line 11425 out of 12374
Working with line 11426 out of 12374
Working with line 11427 out of 12374
Working with line 11428 out of 12374
Working with line 11429 out of 12374
Working with line 11430 out of 12374
Working with line 11431 out of 12374
Working with line 11432 out of 12374
Working with line 11433 out of 12374
Working with line 11434 out of 12374
Working with line 11435 out of 12374
Working with line 11436 out of 12374
Working with line 11437 out of 12374
Working with line 11438 out of 12374
Working with line 11439 out of 12374
Working with line 11440 out of 12374
Working with line 11441 out of 12374
Working with line 11442 out of 12374
Working with line 11443 out of 12374
Working with line 11444 out of 12374
Working with line 11445 out of 12374
Working with line 11446 out of 12374
Working with line 11447 out of 12374
Working with line 11448 out of 12374
Working with line 11449 out of 12374
W

Working with line 11646 out of 12374
Working with line 11647 out of 12374
Working with line 11648 out of 12374
Working with line 11649 out of 12374
Working with line 11650 out of 12374
Working with line 11651 out of 12374
Working with line 11652 out of 12374
Working with line 11653 out of 12374
Working with line 11654 out of 12374
Working with line 11655 out of 12374
Working with line 11656 out of 12374
Working with line 11657 out of 12374
Working with line 11658 out of 12374
Working with line 11659 out of 12374
Working with line 11660 out of 12374
Working with line 11661 out of 12374
Working with line 11662 out of 12374
Working with line 11663 out of 12374
Working with line 11664 out of 12374
Working with line 11665 out of 12374
Working with line 11666 out of 12374
Working with line 11667 out of 12374
Working with line 11668 out of 12374
Working with line 11669 out of 12374
Working with line 11670 out of 12374
Working with line 11671 out of 12374
Working with line 11672 out of 12374
W

Working with line 11868 out of 12374
Working with line 11869 out of 12374
Working with line 11870 out of 12374
Working with line 11871 out of 12374
Working with line 11872 out of 12374
Working with line 11873 out of 12374
Working with line 11874 out of 12374
Working with line 11875 out of 12374
Working with line 11876 out of 12374
Working with line 11877 out of 12374
Working with line 11878 out of 12374
Working with line 11879 out of 12374
Working with line 11880 out of 12374
Working with line 11881 out of 12374
Working with line 11882 out of 12374
Working with line 11883 out of 12374
Working with line 11884 out of 12374
Working with line 11885 out of 12374
Working with line 11886 out of 12374
Working with line 11887 out of 12374
Working with line 11888 out of 12374
Working with line 11889 out of 12374
Working with line 11890 out of 12374
Working with line 11891 out of 12374
Working with line 11892 out of 12374
Working with line 11893 out of 12374
Working with line 11894 out of 12374
W

Working with line 12090 out of 12374
Working with line 12091 out of 12374
Working with line 12092 out of 12374
Working with line 12093 out of 12374
Working with line 12094 out of 12374
Working with line 12095 out of 12374
Working with line 12096 out of 12374
Working with line 12097 out of 12374
Working with line 12098 out of 12374
Working with line 12099 out of 12374
Working with line 12100 out of 12374
Working with line 12101 out of 12374
Working with line 12102 out of 12374
Working with line 12103 out of 12374
Working with line 12104 out of 12374
Working with line 12105 out of 12374
Working with line 12106 out of 12374
Working with line 12107 out of 12374
Working with line 12108 out of 12374
Working with line 12109 out of 12374
Working with line 12110 out of 12374
Working with line 12111 out of 12374
Working with line 12112 out of 12374
Working with line 12113 out of 12374
Working with line 12114 out of 12374
Working with line 12115 out of 12374
Working with line 12116 out of 12374
W

Working with line 12313 out of 12374
Working with line 12314 out of 12374
Working with line 12315 out of 12374
Working with line 12316 out of 12374
Working with line 12317 out of 12374
Working with line 12318 out of 12374
Working with line 12319 out of 12374
Working with line 12320 out of 12374
Working with line 12321 out of 12374
Working with line 12322 out of 12374
Working with line 12323 out of 12374
Working with line 12324 out of 12374
Working with line 12325 out of 12374
Working with line 12326 out of 12374
Working with line 12327 out of 12374
Working with line 12328 out of 12374
Working with line 12329 out of 12374
Working with line 12330 out of 12374
Working with line 12331 out of 12374
Working with line 12332 out of 12374
Working with line 12333 out of 12374
Working with line 12334 out of 12374
Working with line 12335 out of 12374
Working with line 12336 out of 12374
Working with line 12337 out of 12374
Working with line 12338 out of 12374
Working with line 12339 out of 12374
W

TypeError: write() argument must be str, not dict

In [44]:
# drop values appearing in a corpus less than 2 times 
with open('wikipedia_synonyms.json', 'r', encoding='utf-8') as f:
    wiki_synonyms = json.loads(f.read())

top = dict(get_top_x_ne(df))

wiki_synonyms_clean = {}
for keyword, synonyms in wiki_synonyms.items():
    new_synonyms_lst = []
    for synonym in synonyms:
        if synonym in top and top[synonym] >= 2:
            new_synonyms_lst.append(synonym)
    if (len(new_synonyms_lst) > 2 or 
        (len(new_synonyms_lst) == 2 and new_synonyms_lst[0].strip() != new_synonyms_lst[1].strip())):
        wiki_synonyms_clean[keyword.split(',')[0]] = new_synonyms_lst

print('Found', len(wiki_synonyms_clean), 'matching entities')

with open('wikipedia_synonyms_clean.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(wiki_synonyms_clean, ensure_ascii=False, indent=4)) 

Found 979 matching entities


In [25]:
top = show_top_x_ne(df, 100)

[('россия', 9985), ('оон', 4515), ('москва', 3854), ('сша', 3052), ('лавров', 1929), ('сирия', 1858), ('украина', 1680), ('ес', 1653), ('путин', 1648), ('европа', 1397), ('обсе', 1393), ('ближний восток', 1267), ('совет безопасность оон', 1132), ('вашингтон', 976), ('афганистан', 911), ('снг', 879), ('нато', 859), ('ирак', 800), ('киев', 734), ('китай', 728), ('медведев', 702), ('иран', 668), ('шос', 628), ('франция', 625), ('северный африка', 624), ('фрг', 614), ('иг', 603), ('ливия', 594), ('генеральный ассамблея оон', 548), ('совет европа', 535), ('великобритания', 529), ('нью - йорк', 520), ('санкт - петербург', 517), ('африка', 513), ('ссср', 513), ('одкб', 508), ('турция', 500), ('атр', 494), ('запад', 492), ('женева', 463), ('дамаск', 453), ('сочи', 444), ('совет безопасность', 440), ('израиль', 434), ('грузия', 396), ('япония', 393), ('индия', 372), ('корейский полуостров', 370), ('казахстан', 369), ('брюссель', 363), ('латинский америка', 344), ('белоруссия', 343), ('польша', 

In [29]:
show_top_x_ne(df, 3000)

[('россия', 9982), ('оон', 4515), ('москва', 3849), ('сша', 3050), ('сирия', 1856), ('лавров', 1843), ('украина', 1679), ('путин', 1674), ('ес', 1652), ('европа', 1397), ('обсе', 1393), ('ближний восток', 1265), ('совет безопасность оон', 1132), ('вашингтон', 975), ('афганистан', 910), ('снг', 879), ('нато', 859), ('ирак', 799), ('киев', 734), ('медведев', 732), ('китай', 728), ('иран', 668), ('шос', 628), ('франция', 624), ('северный африка', 624), ('фрг', 612), ('иг', 603), ('ливия', 593), ('совет европа', 535), ('великобритания', 528), ('нью - йорк', 520), ('санкт - петербург', 515), ('африка', 513), ('ссср', 512), ('одкб', 508), ('турция', 500), ('атр', 494), ('запад', 489), ('женева', 460), ('дамаск', 446), ('сочи', 442), ('совет безопасность', 439), ('израиль', 433), ('генеральный ассамблея оон', 421), ('грузия', 396), ('япония', 393), ('корейский полуостров', 370), ('индия', 369), ('казахстан', 367), ('брюссель', 361), ('латинский америка', 344), ('польша', 341), ('белоруссия', 

[('россия', 9982),
 ('оон', 4515),
 ('москва', 3849),
 ('сша', 3050),
 ('сирия', 1856),
 ('лавров', 1843),
 ('украина', 1679),
 ('путин', 1674),
 ('ес', 1652),
 ('европа', 1397),
 ('обсе', 1393),
 ('ближний восток', 1265),
 ('совет безопасность оон', 1132),
 ('вашингтон', 975),
 ('афганистан', 910),
 ('снг', 879),
 ('нато', 859),
 ('ирак', 799),
 ('киев', 734),
 ('медведев', 732),
 ('китай', 728),
 ('иран', 668),
 ('шос', 628),
 ('франция', 624),
 ('северный африка', 624),
 ('фрг', 612),
 ('иг', 603),
 ('ливия', 593),
 ('совет европа', 535),
 ('великобритания', 528),
 ('нью - йорк', 520),
 ('санкт - петербург', 515),
 ('африка', 513),
 ('ссср', 512),
 ('одкб', 508),
 ('турция', 500),
 ('атр', 494),
 ('запад', 489),
 ('женева', 460),
 ('дамаск', 446),
 ('сочи', 442),
 ('совет безопасность', 439),
 ('израиль', 433),
 ('генеральный ассамблея оон', 421),
 ('грузия', 396),
 ('япония', 393),
 ('корейский полуостров', 370),
 ('индия', 369),
 ('казахстан', 367),
 ('брюссель', 361),
 ('латински

In [172]:
sorted_dfs = sort_by_official_dates(df)

In [181]:
show_top_x_ne(sorted_dfs[0], 100) # > 2008

[('россия', 1396), ('оон', 716), ('москва', 591), ('ес', 358), ('в.в.путин', 322), ('сша', 312), ('европа', 301), ('совет безопасность', 249), ('снг', 235), ('ирак', 224), ('обсе', 202), ('ближний восток', 172), ('санкт - петербург', 147), ('афганистан', 146), ('нато', 144), ('совет европа', 143), ('китай', 132), ('иран', 132), ('косово', 129), ('ссср', 116), ('израиль', 110), ('украина', 99), ('атр', 97), ('с.в. лавров', 91), ('шос', 86), ('грузия', 84), ('азия', 84), ('индия', 82), ('нью - йорк', 81), ('франция', 79), ('группа восемь', 79), ('генеральный ассамблея', 76), ('одкб', 76), ('фрг', 71), ('центральный азия', 71), ('ливан', 70), ('африка', 70), ('федеральный собрание', 69), ('с.в.лавров', 65), ('белоруссия', 65), ('казахстан', 65), ('эстония', 64), ('магатэ', 63), ('запад', 63), ('великобритания', 62), ('латвия', 61), ('балканы', 60), ('вто', 60), ('пна', 56), ('лаг', 55), ('корейский полуостров', 54), ('государственный дума', 53), ('нпо', 52), ('евразэс', 51), ('асеан', 50)

In [182]:
show_top_x_ne(sorted_dfs[1], 100) # 2008 - 2013

[('россия', 2694), ('оон', 1031), ('москва', 981), ('сша', 620), ('д.а.медведев', 531), ('с.в.лавров', 485), ('ес', 440), ('совет безопасность', 407), ('обсе', 355), ('европа', 319), ('афганистан', 305), ('ближний восток', 292), ('снг', 266), ('грузия', 214), ('нато', 208), ('сирия', 207), ('южный осетия', 194), ('шос', 188), ('китай', 182), ('украина', 173), ('иран', 166), ('одкб', 161), ('франция', 153), ('фрг', 152), ('атр', 151), ('вашингтон', 146), ('в.в.путин', 146), ('абхазия', 145), ('япония', 145), ('нью - йорк', 145), ('ссср', 133), ('израиль', 131), ('белоруссия', 130), ('казахстан', 128), ('совет европа', 126), ('тбилиси', 121), ('лаг', 121), ('северный африка', 121), ('санкт - петербург', 121), ('женева', 118), ('совет россия - нато', 116), ('ливия', 115), ('евроатлантика', 115), ('корейский полуостров', 109), ('индия', 102), ('египет', 101), ('африка', 98), ('великобритания', 96), ('генеральный ассамблея', 96), ('евразэс', 96), ('брюссель', 94), ('польша', 92), ('закавказ

In [183]:
show_top_x_ne(sorted_dfs[2], 100) # 2013 - 2016

[('россия', 2537), ('оон', 1226), ('москва', 1045), ('сша', 807), ('сирия', 796), ('украина', 787), ('с.в.лавров', 516), ('в.в.путин', 495), ('ближний восток', 442), ('ес', 431), ('обсе', 410), ('европа', 367), ('киев', 365), ('совет безопасность', 351), ('иг', 308), ('северный африка', 293), ('ирак', 257), ('вашингтон', 224), ('ливия', 222), ('нато', 219), ('афганистан', 216), ('дамаск', 192), ('китай', 176), ('женева', 168), ('снг', 166), ('фрг', 163), ('франция', 162), ('шос', 157), ('турция', 155), ('запад', 143), ('африка', 140), ('джабхат ан - нусра', 137), ('брикс', 133), ('брюссель', 128), ('йемен', 124), ('атр', 123), ('алеппо', 122), ('совет европа', 121), ('донбасс', 115), ('нью - йорк', 115), ('генеральный ассамблея', 114), ('крым', 112), ('дж . керри', 109), ('польша', 108), ('великобритания', 108), ('одкб', 108), ('иран', 107), ('санкт - петербург', 99), ('минск', 97), ('сочи', 97), ('индия', 95), ('латинский америка', 95), ('донецкий', 94), ('донецк', 94), ('ссср', 94), 

In [184]:
get_top_x_ne(sorted_dfs[3], 100) # > 2016

[('россия', 3365), ('оон', 1556), ('сша', 1316), ('москва', 1249), ('сирия', 811), ('с.в.лавров', 765), ('в.в.путин', 624), ('украина', 621), ('вашингтон', 559), ('обсе', 427), ('ес', 425), ('европа', 404), ('совет безопасность', 390), ('ближний восток', 362), ('киев', 300), ('нато', 293), ('иг', 283), ('сочи', 279), ('великобритания', 267), ('иран', 263), ('ливия', 254), ('афганистан', 244), ('франция', 232), ('ирак', 230), ('фрг', 228), ('запад', 226), ('турция', 222), ('китай', 220), ('снг', 215), ('шос', 214), ('африка', 205), ('дамаск', 194), ('генеральный ассамблея', 190), ('северный африка', 187), ('нью - йорк', 181), ('ссср', 167), ('еаэс', 163), ('одкб', 163), ('озхо', 162), ('женева', 162), ('венесуэла', 155), ('санкт - петербург', 151), ('брикс', 148), ('лондон', 141), ('астана', 141), ('йемен', 140), ('япония', 138), ('идлиб', 137), ('совет европа', 133), ('корейский полуостров', 132), ('донбасс', 130), ('латинский америка', 126), ('польша', 124), ('федеральный собрание', 1

In [73]:
n = 0
weirdos = []
for ne in top:
    if ne[1] == 1:
        n += 1
        weirdos.append(ne[0])
print('total: ', n)
print(weirdos)

total:  12553
['межпарламентский ассамблея православие', 'оак', 'кукут', 'маркес', 'диас - баларт', 'экстремистски', 'организация самый внимательный образ', 'вызывать', 'озхий по установление факт', 'дорожный улица', 'славяносербский район', 'вакуум', 'авдеевский коксохимический комбинат', 'римский статут государство африканский континент', 'роман', 'плотницкий', 'армянска', 'российский мотоциклист', 'мараназ', 'дакм', 'инаб', 'миннига', 'марьямейн', 'джималя', 'избранный пхеньян', 'нимр', 'мешхед', 'федерация профсоюз', 'к.пелчиньский - наленч', 'подляский воеводство', 'д.к.белый официальный представитель', 'браневский повёнок', 'химанена', 'талиб', 'аль - катыф восточный провинция', 'всемирный культурно - исторический наследие', 'джиср аш - шугур', 'совместный центр', 'совместный центр по контроль и координация украинский сторона', 'российский общество красный крест', 'украинский армия', 'лорак', 'первомайск', 'емпримяна', 'кыдыр', 'совет по положение', 'соглашение принцип партнерств