### Sampling based on SemEval + keywords extraction

In [928]:
import pandas as pd
import configparser
from newspaper import Article, Config
import time
import random
import requests
import json
import re
import deepl

config = configparser.ConfigParser()
config.read("keys.config")

hf_key = config['credentials']['hf_key']
openai_key = config['credentials']['openai_key']
key_deepL = config['credentials']['deepl_key']

In [61]:
classes = {4: 'Very dissimilar', 3: 'Somewhat dissimilar', 2: 'Somewhat similar', 1: 'Very similar', 0: '-'}

data = pd.read_csv("final_evaluation_data.csv")

data = data[(data.url1_lang == 'ru') | (data.url1_lang == 'pl') | (data.url1_lang == 'en')]
data["Overall_Class"] = data['Overall'].apply(lambda x: round(x)).apply(lambda x: classes[x])

VS = data[data.Overall_Class == 'Very similar'].sample(n=175)
SS = data[data.Overall_Class == 'Somewhat similar'].sample(n=175)
SD = data[data.Overall_Class == 'Somewhat dissimilar'].sample(n=175)
VD = data[data.Overall_Class == 'Very dissimilar'].sample(n=175)

sample = pd.concat([VS, SS, SD, VD], ignore_index=True)

### Newspaper3k

In [234]:
pattern_mistake = r"Article `download\(\)` failed with 403 Client Error: Forbidden for url: (.*) on URL .*"

def download_newspaper_3l(link, lang, max_retries=5, set_config=False):
    config = Config()
    if set_config:
        config.headers = { #https://iplogger.org/useragents/
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:124.0) Gecko/20100101 Firefox/124.0',
            'Accept-Language': 'uk-UA,uk;q=0.8',
            'Accept': 'test/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Referer': 'https://www.google.com/',
            'DNT': '1',
            'Connection': 'keep-alive'
        }
    if link != "Not found":
        for attempt in range(max_retries):
            try:
                config.request_timeout = random.randint(5, 14)
                article = Article(link, language=lang, config=config)
                article.download()
                article.parse()
                article.nlp()
                print("success")
                print(" ".join(article.keywords))
                return article.url, article.text, article.publish_date, article.summary, " ".join(article.keywords)
            except Exception as e:
                print(e)
                if not set_config:
                    match = re.match(pattern_mistake, str(e))
                    if match: #403 
                        url = match.group(1)
                        print('Now instead trying with url:', url)
                        url, text, date, summary, keywords = download_newspaper_3l(url, lang, 5, True)
                        return url, text, date, summary, keywords
                print(f"Attempt {attempt+1} of {max_retries}")
                sleep_time = 2 ** attempt + 10
                print(f"Retrying in {sleep_time} seconds.")
                time.sleep(sleep_time)
    else:
        print('Not found')
        return "","","","",""
    print("fail")
    return "","","","",""

In [None]:
def try_both_links(link1, link2, lang):
    url, text, publish_date, summary, keywords = download_newspaper_3l(link1, lang)
    if text == "":
        print('Archive failed')
        url, text, publish_date, summary, keywords = download_newspaper_3l(link2, lang)
        if text == "":
            print('Main link also failed')
    return url, text, publish_date, summary, keywords

new_columns1 = sample.apply(lambda row: try_both_links(row.ia_link1, row.link1, row.url1_lang),
                                                      axis=1, result_type='expand')
new_columns1.columns = ['source1', 'text1', 'date1', 'extractive_summary1', 'keywords1']

sample = pd.concat([new_columns1, sample], axis=1)


new_columns2 = sample.apply(lambda row: try_both_links(row.ia_link2, row.link2, row.url2_lang),
                                                      axis=1, result_type='expand')
new_columns2.columns = ['source2', 'text2', 'date2', 'extractive_summary2', 'keywords2']
sample = pd.concat([new_columns2, sample], axis=1)

In [46]:
sample = sample[(sample.text1 != "") & (sample.text2 != "")]
VS = sample[sample.Overall_Class == 'Very similar'].sample(n=160)
SS = sample[sample.Overall_Class == 'Somewhat similar'].sample(n=160)
SD = sample[sample.Overall_Class == 'Somewhat dissimilar'].sample(n=160)
VD = sample[sample.Overall_Class == 'Very dissimilar'].sample(n=160)

sample = pd.concat([VS, SS, SD, VD], ignore_index=True)

In [47]:
keywords_adjusting = lambda cell: ";".join(cell.split(" "))

sample['keywords1'] = sample['keywords1'].apply(keywords_adjusting)
sample['keywords2'] = sample['keywords2'].apply(keywords_adjusting)

### Keywords

- https://huggingface.co/docs/api-inference/detailed_parameters
- https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
- https://www.promptingguide.ai/models/mistral-7b
- https://docs.mistral.ai/guides/prompting-capabilities/

In [48]:
news_example = "В Киеве 13 марта внезапно изменится погода. В столице Украины поднимется сильный ветер.\n\nОб этом написала пресс служба ГУ ГСЧС Украины в г. Киеве со ссылкой на Укргидрометцентр. В городе объявили І уровень опасности – желтый.\n\n\"В Киеве в ближайший час и до конца суток 13 марта порывы ветра 15-18 м/с\", – говорится в сообщении.\n\nВ Киеве резко изменится погода. Иллюстрация\n\nКак сообщал OBOZREVATEL, в Киев в марте придет настоящая весна – потеплеет, вместо снега пойдет разве что дождь, чаще будет солнечная погода.\n\nНе надоедаем! Только самое важное - подписывайся на наш Telegram-канал"
mistral_prompt = lambda cell: "<s>[INST] Extract 10 keywords from a news article. The news article: \\" + news_example + "\\ Order them from the most informative to the least, in the following format: 1. <top-1 keyword>; 2. <top-2 keyword>; ... ;  10. <top-10 keyword>; [/INST] 1. погода; 2. Украина; 3. опасность; 4. Киев; 5. ветер; 6. Укргидрометцентр; 7. март; 8. изменится; 9. весна; 10. внезапно; </s> [INST] Extract 10 keywords from a news article. The news article: \\" + cell + "\\ Order them from the most informative to the least, in the following format: 1. <top-1 keyword>; 2. <top-2 keyword>; ... ;  10. <top-10 keyword>; [/INST]"

sample["mistral_prompt1"] = sample["text1"].apply(mistral_prompt)
sample["mistral_prompt2"] = sample["text2"].apply(mistral_prompt)

In [289]:
headers = {'Content-type': 'application/json', "Authorization": (f"Bearer " + hf_key)}

def query_mistral(prompt,
                  max_new_tokens=200,
                  do_sample=False, 
                  temperature=0.001,
                  top_p=0.999,
                  return_full_text=False):
    parameters = {'max_new_tokens':max_new_tokens, 
                  'return_full_text': return_full_text, 
                  'do_sample': True, 
                  'top_p':top_p, 
                  'temperature':temperature}
    options = {'use_cache': False}
    payload = {'inputs': prompt,
               'parameters': parameters,
               'options': options}
    data = json.dumps(payload)
    response = requests.request("POST",
                                "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
                                headers=headers,
                                data=data)
    try:
        return json.loads(response.content.decode("utf-8"))[0]['generated_text']
    except Exception as e:
        print(json.loads(response.content.decode("utf-8")))
    return 'Model error'

In [52]:
sample["mistral_keywords1"] = sample["mistral_prompt1"].apply(query_mistral)

[{'generated_text': ' 1. World War II veteran; 2. coronavirus; 3. hospital; 4. guard of honor; 5. nurses; 6. Tickhill Road Hospital; 7. Doncaster; 8. COVID-19; 9. recovery; 10. 99-year-old.'}]
[{'generated_text': ' 1. Parlament Europejski; 2. Polska; 3. praworządności; 4. rezolucja; 5. Europarlament; 6. Węgry; 7. sytuacja; 8. sędziów; 9. procedura; 10. Unia Europejska;\n\n1. European Parliament; 2. Poland; 3. resolution; 4. righteousness; 5. Europarliament; 6. Hungary; 7. situation; 8. judges; 9. procedure; 10. European Union.'}]
[{'generated_text': ' 1. przebudowa; 2. ul. Bohaterów Getta Warszawskiego; 3. Elbud Szczecin; 4. umowa; 5. realizacja; 6. Zarząd Dróg i Transportu Miejskiego; 7. remont; 8. nawierzchnia; 9. zieleń; 10. inwestycja;\n\n1. Elbud Szczecin wins bid for road reconstruction in Szczecin city center;\n2. Ul. Bohaterów Getta Warszawskiego, Królowej Jadwigi, and Ściegiennego to be reconstructed;\n3. New contract for Elbud Szczecin in the ongoing road improvement project;

[{'generated_text': ' 1. парламентские выборы; 2. Израиль; 3. Биньямин Нетаньяху; 4. Ликуд; 5. победа; 6. Кнессет; 7. Правительственная коалиция; 8. Бени Гантц; 9. Кахоль-Лаван; 10. выборы;\n\n1. Израиль; 2. парламентские выборы; 3. Биньямин Нетаньяху; 4. Ликуд; 5. победа; 6. Кнессет; 7. Правительственная коалиция; 8. Бени Гантц; 9. Кахоль-Лаван; 10. выборы; 11. голова; 1'}]
[{'generated_text': " 1. wyborcy; 2. wyborcza; 3. komisja; 4. prezydenckie; 5. kandydaci; 6. podpisy; 7. zmarli; 8. PKW; 9. prokuratura; 10. wypadki nieprawidłowe;\n\nExplanation:\n\n1. wyborcy (voters)\n2. komisja (commission)\n3. prezydenckie (presidential)\n4. kandydaci (candidates)\n5. podpisy (signatures)\n6. zmarli (dead people)\n7. PKW (State Electoral Commission)\n8. prokuratura (prosecutor's office)\n9. wypadki nieprawidłowe (irregularities)\n"}]
[{'generated_text': " 1. coronavirus; 2. New York City; 3. first case; 4. woman; 5. Iran; 6. self-quarantined; 7. respiratory symptoms; 8. New York's Wadsworth La

[{'generated_text': " 1. Star Wars: The Clone Wars; 2. Disney+; 3. premiere; 4. Dave Filoni; 5. Anakin Skywalker; 6. Clone Wars; 7. prequel trilogy; 8. animated series; 9. Netflix; 10. season seven.\n\nAdditional keywords:\n\n11. Obi-Wan Kenobi\n12. Padmé Amidala\n13. Jedi\n14. Republic\n15. Separatists\n16. Animated episodes\n17. Ahsoka Tano\n18. Jedi Council\n19. Clones\n20. Droids\n21. Star Wars: The Last Jedi\n22. Luke Skywalker\n23. Disney's streaming service\n24. Critical reception\n25. Fan support."}]
[{'generated_text': ' 1. Manowska; 2. Sąd Najwyższy; 3. plan; 4. problemy; 5. odebranie; 6. przywilejów; 7. Sąd Najwyższy (SN); 8. Konstytucja; 9. zasady; 10. pracownicy\n\nor\n\n1. Manowska; 2. Sąd Najwyższy; 3. plan; 4. problems; 5. odebranie; 6. niezasadnych przywilejów; 7. SN; 8. Konstytucja; 9. zasady; 10. pracownicy, sędziowie.'}]
[{'generated_text': ' 1. Kinga Rusin; 2. Wiadomości; 3. Maciej Sawicki; 4. TVP; 5. relacja; 6. skrytykowano; 7. niedawna impreza; 8. pozwy; 9. mani

[{'generated_text': ' 1. Абхазия; 2. выборы; 3. президентские; 4. Аслан Бжания; 5. победа; 6. Центральная избирательная комиссия; 7. март, 23; 8. Итоги; 9. Рауль Хаджимба; 10. Верховный суд;\n\n1. Абхазия;\n2. выборы;\n3. президентские;\n4. Аслан Бжания;\n5. победа;\n6. Центральная избирательная комиссия;\n7. март, 23;\n8. итоги;\n9. Рауль Хаджимба;\n10. Верховный суд;\n11. презид'}]
[{'generated_text': ' 1. hostage; 2. India; 3. Uttar Pradesh; 4. armed man; 5. Subhash Batham; 6. birthday party; 7. police action; 8. villagers; 9. murder case; 10. Farukkhabad;\n\n1. Children; 11. house; 12. rescue; 13. gunshots; 14. crude bomb; 15. authorities; 16. criminal charges; 17. shoot-out; 18. injuries; 19. stand-off; 20. security personnel;\n\n21. murder; 22. framed; 23. pressurise; 24. wife; 25. killed; 26. beaten; 27. stones; 28. attempted; 29. scene'}]
[{'generated_text': ' 1. influenza; 2. antigenic imprinting; 3. flu season; 4. H1N1; 5. H3N2; 6. McMaster University; 7. susceptibility; 8. a

[{'generated_text': ' 1. $1,200 payment; 2. U.S. government; 3. COVID-19 pandemic; 4. rescue package; 5. eligibility; 6. adjusted gross income; 7. accessing funds; 8. IRS; 9. tax return; 10. economic impact payment.\n\nExplanation: The article primarily focuses on the upcoming $1,200 payment from the U.S. government, which is part of a larger rescue package. The article discusses the eligibility criteria for receiving the payment and how to access it. The article also mentions the COVID-19 pandemic and the impact it has had on the economy. The IRS and tax returns are mentioned in the context of accessing the payment.'}]
[{'generated_text': " 1. Монголия; 2. Великая Отечественная война; 3. Фашистская Германия; 4. Декларация; 5. Советский Союз; 6. Помощь; 7. Монгольская Народная Республика; 8. Халхин-Гол; 9. Поставки; 10. Советская армия.\n\nExplanation:\n\n1. Mongolia is the main keyword in the article as it is the subject of the news.\n2. The Great Patriotic War (Великая Отечественная 

[{'generated_text': ' 1. ветераны; 2. Второй мировой войны; 3. Россия; 4. США; 5. парад Победы; 6. Тимоти Дэвис; 7. ТАСС; 8. Поездка; 9. Москва; 10. Санкт-Петербург;\n\n1. ветераны Второй мировой войны; 2. США; 3. Россия; 4. парад Победы; 5. Тимоти Дэвис; 6. ТАСS; 7. поездка; 8. Москва; 9. Санкт-Петербург; 10. ветераны в возрасте; \n\n1. ветераны; 2. Второй мировой войны; 3. Россия;'}]
[{'generated_text': ' 1. V4; 2. premiery; 3. Koronawirus; 4. spotkanie; 5. gospodarka; 6. rozwoju; 7. Polska; 8. Czech; 9. Węgier; 10. Słowacja;\n11. Radzie Europejskiej; 12. prezydencja; 13. Rozmawiałem; 14. Andrej Plenković; 15. Chorwacja; 16. forum UE; 17. walce; 18. ministrów zdrowia; 19. dostawy; 20. leków; 21. sprzętu; 22. uniezależnienie; 23. dostawców.'}]
[{'generated_text': ' 1. Oktoberfest; 2. pandemia koronawirusa; 3. Markus Soeder; 4. Dieter Reiter; 5. Bawaria; 6. koronawirus; 7. masowe wydarzenie; 8. decyzja; 9. ryzyko infekcji; 10. Oktoberfest 2020;\n\n1. Oktoberfest cancelled; 2. coronavir

[{'generated_text': ' 1. zakażenie koronawirusem; 2. Ministerstwo Zdrowia; 3. nowe przypadki; 4. COVID-19; 5. 104 osoby; 6. Poland; 7. raporty; 8. śmiertelny; 9. Województwa; 10. zgon;\n\n1. koronawirus; 2. Ministerstwo Health; 3. new cases; 4. Poland; 5. 104 persons; 6. reports; 7. fatal; 8. voivodeships; 9. deaths; 10. confirmed.'}]
[{'generated_text': ' 1. maseczki; 2. zakup; 3. Chiny; 4. bezpieczeństwo; 5. Sejm; 6. Ryszard Terlecki; 7. Ministerstwo Zdrowia; 8. Polska; 9. Komisja Europejska; 10. UE.\n\nExplanation: The article discusses the issue of defective masks purchased from China for Polish healthcare workers, with a focus on the Polish Vice Marshal of the Sejm, Ryszard Terlecki, demanding explanations and the return of costs. The article also mentions the involvement of the European Commission (EC) and other EU countries in similar issues with the masks. The keywords reflect the main topics of the article, including the masks, the country of origin, safety concerns, the Polis

[{'generated_text': ' 1. Minneapolis; 2. police officer; 3. George Floyd; 4. murder; 5. protests; 6. African American; 7. Derek Chauvin; 8. curfew; 9. investigation; 10. charges.\n\nAdditional keywords:\n3. violence; 4. troops; 5. demonstrators; 6. Autopsy; 7. underlying health conditions; 8. coronary artery disease; 9. hypertensive heart disease; 10. federal troops; 11. Barack Obama; 12. Joe Biden; 13. justice; 14. systemic racism; 15. former president.'}]
[{'generated_text': ' 1. UE; 2. fundusz; 3. odbudowa; 4. pandemia; 5. Polska; 6. gospodarka; 7. szczyt; 8. Komisja Europejska; 9. budżet; 10. Unia Europejska;\n\nAdditional keywords:\n11. rekordowa pomoc; 12. kryzys; 13. Mark Rutte; 14. Holandia; 15. bilateralne konsultacje; 16. Komisja Europejska; 17. Parlament Europejski; 18. nowy projekt; 19. struktura; 20. polityka spójności.'}]
[{'generated_text': ' 1. УFC; 2. Алексей Олейник; 3. боец MMA; 4. победа; 5. Фабрисиу Вердум; 6. тяжеловесы; 7. рамках турнира; 8. три раунда; 9. рейтин

[{'generated_text': ' 1. beaches; 2. Orange County; 3. closure; 4. California; 5. Gavin Newsom; 6. reopening; 7. active recreation; 8. Huntington Beach; 9. Dana Point; 10. Seal Beach.'}]
[{'generated_text': ' 1. pandemia; 2. koronawirus; 3. Jadwiga Emilewicz; 4. gospodarka; 5. Polska; 6. ograniczenia; 7. rząd; 8. koszty; 9. wybory; 10. wyborczy;\n\n1. Jadwiga Emilewicz; 2. pandemia; 3. koronawirus; 4. gospodarcze skutki; 5. Polska; 6. ograniczenia gospodarki; 7. koszty; 8. wyborów; 9. wyborczy; 10. nowe możliwości.\n\nOr I could do it like this:\n\n1. Jadwiga Emilewicz; 2. pand'}]
[{'generated_text': ' 1. Małgorzata Kidawa-Błońska; 2. rezygnacja; 3. Platforma Obywatelska; 4. Rafał Trzaskowski; 5. Radosław Sikorski; 6. zarząd; 7. wyłonienia następcy; 8. Tomasz Siemoniak; 9. kampania; 10. wiceprzewodniczący;\n11. sytuacja;\n12. komentował;\n13. odegrała rolę;\n14. porażka;\n15. aparat partyjny;\n16. trudny moment;\n17. wyjdziemy;\n18. wzmocnieni;\n19. najważniejsze;\n20'}]
[{'generated_t

[{'generated_text': ' 1. Netanjahu; 2. Izrael; 3. Trump; 4. USA; 5. Sulejmani; 6. prezydent; 7. Irán; 8. atak; 9. generał; 10. Al-Kuds;\n\n1. Netanjahu congratulates Trump;\n2. Israeli PM praises US president;\n3. Death of Iranian general Sulejmani;\n4. US attack in Bagdad;\n5. Netanjahu warns response to attack;\n6. Benjamin Netanjahu;\n7. Iran;\n8. Acknowledgement of US decision;\n9. Al-Kuds commander;\n10. Iranian second-in-command.'}]
[{'generated_text': ' 1. koronawirus; 2. Polska; 3. pochówki; urnowe; 4. ofiary; 5. Robert Czyżak; 6. Polska Izba Branży Pogrzebowej; 7. Włochy; 8. procedura; 9. chowanie ofiar; 10. główny inspektor sanitarny.\n\nExplanation: The article is about the recommendation of Robert Czyżak, the president of the Polish Funeral Industry Chamber, that almost all victims of COVID-19 in Poland have been cremated and that this procedure is recommended by experts. The article also mentions special guidelines for the burial of COVID-19 victims issued by the chief san

[{'generated_text': ' 1. Американский союз гражданских свобод (ACLU); 2. Миннесота; 3. полиция; 4. журналисты; 5. иск; 6. протесты; 7. нападения; 8. правоохранительные органы; 9. первая поправка; 10. Конституция США.\n\nAdditionally, some other relevant keywords could be:\n\n11. Мединаполис\n12. мэрия\n13. шеф полиции\n14. полицейский профсоюз\n15. глава управления общественной безопасности\n16. полковник полиции штата\n17. резиновые пули\n18. слезоточивый газ\n19. четверта'}]
[{'generated_text': " 1. COVID-19; 2. people; 3. hospitals; 4. state; 5. Vijayan; 6. virus; 7. response; 8. religious leaders; 9. corona care centers; 10. essential items.\n\nExplanation:\n\n1. COVID-19: The article is about the current situation of COVID-19 in a particular state.\n2. people: The number of people under observation and in hospitals due to COVID-19 is mentioned.\n3. hospitals: The number of people in hospitals due to COVID-19 is mentioned.\n4. state: The state's current situation regarding COVID-19

[{'generated_text': ' 1. Министр иностранных дел Нидерландов; 2. Стеф Блок; 3. Грузия; 4. встреча; 5. ЕС; 6. миссия наблюдателей; 7. МНЕС; 8. Нидерланды; 9. региональная безопасность; 10. парламентские выборы;\n\nExplanation: The article is about a visit of the Dutch Foreign Minister to Georgia and his meeting with the head of the European Union Monitoring Mission in Georgia. The keywords reflect the main topics of the article, including the names of the people and organizations involved, the purpose of the visit, and the context of the mission.'}]
[{'generated_text': ' 1. Polska; 2. prezydent; 3. Andrzej Duda; 4. Małgorzata Kidawa-Błońska; 5. sondaż; 6. marketing; 7. wizerunek; 8. onet.pl; 9. ekspert; 10. kampania;\n11. kandydat;\n12. obiad;\n13. sztab;\n14. krytykował;\n15. czarną polewką;\n16. nieetycznie;\n17. zaprosili;\n18. ekspert od wizerunku.'}]
[{'generated_text': ' 1. wybory; 2. Polska; 3. opozycja; 4. rządząca koalicja; 5. prezydenckie; 6. Donald Tusk; 7. wyborczy; 8. konst

[{'generated_text': ' 1. sondaż; 2. wyborczy; 3. Andrzej Duda; 4. Urzędujący prezydent; 5. 40 proc. głosów; 6. Rafał Trzaskowski; 7. 29 proc. respondentów; 8. Szymon Hołownia; 9. 15 proc. poparcia; 10. Krzysztof Bosak;\n11. 7 proc. uczestników sondażu;\n12. Robert Biedroń;\n13. 4 proc. poparcia;\n14. Marek Jakubiak;\n15. 1 proc. głosów;\n16. frekwencja;\n17. 70 proc. uczestników sondażu;\n18'}]
[{'generated_text': ' 1. COVID-19; 2. Fort Bend County; 3. residents; 4. cases; 5. symptoms; 6. isolation; 7. travel; 8. Dr. Minter; 9. social distancing; 10. hygiene.\n\nExplanation: The article primarily focuses on the new cases of COVID-19 in Fort Bend County and the precautions being taken to prevent the spread of the virus. The keywords reflect the main topics of the article.'}]
[{'generated_text': ' 1. Украина, МИД; 2. Россия, МИД; 3. ситуация на востоке; 4. сбитый самолет; 5. Малайзийские авиалинии; 6. рейс MH17; 7. судебный процесс; 8. Гаага; 9. Северный поток-2; 10. санкции ЕС.\n\nExpla

[{'generated_text': ' 1. Kamienica Żelazko; 2. Poznań; 3. budynek; 4. Stare Miasto; 5. 1900; 6. renowacja; 7. Constructa Plus; 8. odbudowa; 9. CDF Architekci; 10. mieszkania;\n\n1. Kamienica; 2. Poznań; 3. Żelazko; 4. budowa; 5. 1900; 6. renesansowy; 7. trójkątna parcela; 8. konstrukcja; 9. rozbiórka; 10. nowoczesne skrzydło.'}]
[{'generated_text': ' 1. Ukraińcy; 2. Polska; 3. migranci zarobkowi; 4. epidemia COVID-19; 5. praca; 6. wjazd; 7. Polski rząd; 8. ukraińskie agencja Ukrinform; 9. migracja; 10. pracownicy sezonowi.\n\nExplanation:\n\n1. Ukraińcy - the main subject of the article\n2. Polska - the country where the Ukraińcy were working and are now returning from or trying to go back to\n3. migranci zarobkowi - working migrants\n4. epidemia COVID-19 - the reason for the Ukraińcy leaving Poland and the current situation\n5. praca - work, employment\n6. wjazd -'}]
[{'generated_text': ' 1. Sejm; 2. regulamin; 3. Izba; 4. zdalna praca; 5. Kompromis; 6. tradycyjna; 7. posiedzenie; 8. 

[{'generated_text': ' 1. КПВВ; 2. линия разграничения; 3. Украина; 4. пункты пропуска; 5. карантин; 6. Донбасс; 7. Государственная пограничная служба Украины; 8. открытие; 9. регион; 10. ОРДЛО.\n\nExplanation:\n\n1. KPVV - Checkpoint of the State Border Service of Ukraine (CBSU)\n2. линия разграничения - border line\n3. Украина - Ukraine\n4. пункты пропуска - border checkpoints\n5. карантин - quarantine\n6. Донбасс - Donetsk and Luhansk regions\n7. Государственная пограничная служба Украины - State Border Service of'}]
[{'generated_text': " 1. стаèveка (strike); 2. БХЕЛ (BHEL); 3. демонстрации (demonstrations); 4. центральное правительство (central government); 5. протесты (protests); 6. банки (banks); 10-point demands (12-point demands); 7. индустрии (sectors); 8. Bhopal; 9. союзы (trade unions); 10. анти-работникам (anti-employees) политики (policies).\n\nHere's the ordered list: 1. стаèveка (strike); 2. БХЕЛ (BHEL); 3. центральное правительство (central government); 4. протесты (pro

[{'generated_text': ' 1. Bayelsa; 2. Supreme Court; 3. election; 4. governor-elect; 5. APC; 6. irregularities; 7. judgment; 8. President Muhammadu Buhari; 9. INEC; 10. PDP;\n11. November 2019;\n12. certificates;\n13. candidates;\n14. sworn in;\n15. Courage;\n16. refusing;\n17. interfere;\n18. judiciary.'}]
[{'generated_text': ' 1. retail lending stock; 2. Hungarian banks; 3. November; 4. MNB; 5. HUF 7.022 trillion; 6. transactions; 7. revaluations; 8. retail loans; 9. retail deposits; 10. net inflows/withdrawals;\n\n1. retail lending stock of Hungarian banks;\n2. up HUF 77.4 billion;\n3. nearly HUF 7.022 trillion;\n4. November;\n5. National Bank of Hungary (MNB);\n6. retail loans;\n7. transactions;\n8. HUF 83.1 bln;\n9. revaluations;\n10. HUF 5.7 bln;\n11. retail deposits;\n12. HU'}]
[{'generated_text': ' 1. Bożego Ciała; 2. procesje; 3. Kościołów; 4. uroczystości; 5. duszpasterze; 6. zdrowie; 7. komunikaty; 8. diecezje; 9. uczestnicy; 10. SARS-CoV-2;\n\nKeywords related to the article

[{'generated_text': ' 1. Colway; 2. suplementy diety; 3. LunaCol; 4. lunazyna; 5. lizozym; 6. beta-glukany; 7. multimineralny; 8. Colamina; 9. ColDeKa; 10. DetoCol.\n\nExplanation: The article is about various supplements offered by Colway, including LunaCol, Colamina, ColDeKa, and DetoCol. The top keywords reflect the brand name, the type of supplements, and the specific names of the supplements mentioned in the article.'}]
[{'generated_text': ' 1. Birds of Prey; 2. digital release; 3. featurette; 4. DC flick; 5. Cathy Yan; 6. director; 7. script; 8. origins; 9. Warner Bros.; 10. digital retailers.\n\nAdditional keywords: Margot Robbie, Christina Hodson, Birds Eye View Mode, Birds of a Feather, Grime and Crime, Wild Nerds, Romanesque, Sanity is Sooo Last Season, A Love/Skate Relationship, Gag Reel, March 24, high definition, standard definition, Video On Demand, cable and satellite providers, gaming consoles.'}]
[{'generated_text': ' 1. Ostrołęka; 2. Rafał Trzaskowski; 3. Elektrownia 

[{'generated_text': ' 1. zakażenie; 2. koronawirus; 3. SARS-CoV-2; 4. województwo lubelskie; 5. Agnieszka Strzępka; 6. nowe przypadki; 7. laboratorium; 8. pozytywne testy; 9. śmierć; 10. Ministerstwo Zdrowia;\n11. epidemia;\n12. Polska;\n13. raport sanepidu;\n14. Warszawa;\n15. objawy kliniczne;\n16. 1438;\n17. zgon;\n18. 33 907;\n19. choroby współistniejące.'}]
[{'generated_text': ' 1. Archive Team; 2. rogue archivist collective; 3. saving websites; 4. digital heritage; 5. history; 6. projects; 7. volunteering; 8. data preservation; 9. Internet Archive; 10. Wayback Machine;\n\n1. Archiveteam.org; 2. projects; 3. manifestos; 4. plans; 5. sub-collections; 6. data acquisition; 7. multi-terabyte datasets; 8. emergency backups; 9. needed sites; 10. server failures.'}]
[{'generated_text': ' 1. Великая Победа; 2. День Победы; 3. Президент; 4. Содружество; 5. Великая Отечественная война; 6. Поздравления; 7. Ветераны; 8. Дружба; 9. Советский Союз; 10. История.\n\nExplanation: The article is ab

[{'generated_text': ' 1. Киев; 2. заболевших; 3. коронавирус; 4. Виталий Кличко; 5. число; 6. сутки; 7. медики; 8. Печерский район; 9. Дарницко; 10. COVID-19;\n\n1. Киев; 2. заболевших; 3. коронавирус; 4. Виталий Кличко; 5. сутки; 6. число; 7. госпитализированы; 8. излечились; 9. Украина; 10. COVID-19;\n\n1. Киев; 2. заболевших; 3. коронавирус; 4. Виталий Кличко; 5. чи'}]
[{'generated_text': ' 1. AdBlocka; 2. uBlocka; 3. wyłącz; 4. wyjątek; 5. nasze; 6. artykuły; 7. domena; 8. spokojnie; 9. dodaj; 10. blokowanie;\n\nExplanation: The article is asking readers to disable AdBlock or uBlock on our website by adding an exception for our domain. The keywords reflect the content of the request.'}]
[{'generated_text': ' 1. SpaceX; 2. космический корабль; 3. Starship; 4. Федеральное управление гражданской авиации; 5. лицензия; 6. испытательные полеты; 7. ракета-носитель; 8. космодром Бока-Чика; 9. Техас; 10. Илона Маска;\n\n1. SpaceX получает лицензию;\n2. Федеральное управление гражданской ави

[{'generated_text': ' 1. военные медики; 2. ЦВО; 3. эвакуация; 4. учения; 5. горы Урала; 6. реанимационные мероприятия; 7. вертолет; 8. медицинский модуль; 9. военно-воздушные силы; 10. противовоздушная оборона.\n\nExplanation: The article is about a military exercise where military medics from the Central Military District (ЦВО) practiced evacuating wounded soldiers using helicopters equipped with mobile medical modules. The article mentions the use of medical equipment, such as artificial lung ventilation and heart stimulation, to save lives during transportation. The article also mentions the participation of army aviation crews, ground equipment, and over 40 military medics. The'}]
[{'generated_text': ' 1. США; 2. санкции; 3. Иран; 4. ядерная программа; 5. Совместный всеобъемлющий план действий (СВПД); 6. Госсекретарь США Майкл Помпео; 7. исключения; 8. проекты; 9. Организация по атомной энергии; 10. штрафные меры;\n\n1. США; 2. санкции; 3. Иран; 4. ядерная программа; 5. Совместный

[{'generated_text': " 1. Europa; 2. budżet; 3. Mateusz Morawiecki; 4. Rada Europejska; 5. Polska; 6. Wielka Brytania; 7. siedmioletni budżet; 8. negocjacje; 9. środki unijne; 10. Charles Michel.\n\nExplanation: The article is about the European Union budget summit, with a focus on Polish Prime Minister Mateusz Morawiecki's stance on the need for a larger budget. The keywords reflect the main topics of the article, including the European Union, budget, Poland, negotiations, and the absence of the UK."}]
[{'generated_text': ' 1. Конституция; 2. Россия; 3. Владимир Путин; 4. Вячеслав Володин; 5. Госдума; 6. изменения; 7. политическая культура; 8. диалог; 9. обратная связь; 10. Конституционный строй.'}]
[{'generated_text': ' 1. Renata Beger; 2. PiS; 3. Samoobrona; 4. afera; 5. rząd; 6. Sejm; 7. Adam Lipiński; 8. nagrania; 9. poparcie; 10. polityka;\n\nExplanation:\n\n1. Renata Beger - the main subject of the article\n2. PiS - the political party in question\n3. Samoobrona - the political p

[{'generated_text': ' 1. Politycy; 2. Porozumienia; 3. Jan Strzeżek; 4. Jakub Drożdż; 5. prokuratura; 6. wykroczenie; 7. akcja dezinformacyjna; 8. LGBT; 9. znamiona wykroczenia; 10. Barta Staszewski;\n11. zdjęcia; 12. tablice informacyjne; 13. nazwy polskich miejscowości; 14. Guy Verhofstadt; 15. PE; 16. fake news; 17. homofobiczne zachowania; 18. Polska; 19. aktywiści; 20. dezinformacja.'}]
[{'generated_text': ' 1. мост; 2. Дедовичский район; 3. река Шелонь; 4. капитальный ремонт; 5. опоры; 6. ООО «Строительная компания «Балтийский Берег»; 7. ремонт сооружения; 8. Псковское агентство информации; 9. федеральный центр; 10. 2021 год;\n11. автомобильная дорога\n12. Дубровка – Дедовичи\n13. график\n14. техника\n15. трудятся\n16. финансовая поддержка\n17. стоимость контракта\n18. завершение работ\n19. ростверк\n2'}]
[{'generated_text': ' 1. Louise Redknapp; 2. Lorraine; 3. interview; 4. UK tour; 5. pop beauty; 6. ageless beauty; 7. plunging denim shirt; 8. Strictly Come Dancing; 9. splittin

[{'generated_text': ' 1. грипп; 2. Киев; 3. заболеваемость; 4. ОРВИ; 5. Николай Поворозник; 6. УНН; 7. КГГА; 8. рост; 9. дети; 10. взрослые;\n11. болезни; 12. повысился; 13. эпидемический процесс; 14. госпитализация; 15. Валентина Гинзбург;\n16. гриппом; 17. лабораторно подтвержденный; 18. реанимация; 19. дети; 20. прививка.'}]
[{'generated_text': " 1. rozwój duchowy; 2. Kaznodziejów; 3. entuzjazm; 4. starość; 5. młodość; 6. Pismo Święte; 7. rozwój; 8. dojrzałość; 9. mądrość; 10. akceptacja;\n\nExplanation:\nThe article discusses the importance of spiritual growth and development throughout one's life, contrasting the initial enthusiasm and idealism of youth with the deeper understanding and wisdom of old age. The keywords reflect the main themes of the text, including spiritual growth, youth, old age, wisdom, and acceptance."}]
[{'generated_text': ' 1. покушение; 2. убийство; 3. мужчина; 4. женщина; 5. колодец; 6. конфликт; 7. драка; 8. Одесская область; 9. Лиманский район; 10. правоо

[{'generated_text': ' 1. переговоры; 2. торговые отношения; 3. Брюссель; 4. Великобритания; 5. ЕС; 6. вторая фаза; 7. выход из Евросоюза; 8. Мишель Барнье; 9. Дэвид Фрост; 10. диалог;\n\n1. переговоры о торговых отношениях; 2. Брюссель; 3. Великобритания; 4. ЕС; 5. вторая фаза; 6. Лондон; 7. Мишель Барнье; 8. Дэвид Фрост; 9. торговые отношения; 10. переходный период.'}]
[{'generated_text': ' 1. Секретарь Совбеза; 2. Николай Патрушев; 3. ограничительные меры; 4. санитарные режимы; 5. коронавирус; 6. продовольствие; 7. лекарства; 8. регионы Урала; 9. спекуляции; 10. Совет безопасности РФ.\n\nExplanation: The article is about Russian Secretary of the Security Council, Nikolai Patrushev, urging the regions of the Urals to enforce all established restrictive measures and sanitary regimes to save lives and prevent speculation on food and medicine markets during the coronavirus pandemic. The keywords reflect the main topics of the article, including the secretary, the Security Council, the re

[{'generated_text': ' 1. student account; 2. theft; 3. BYU-Idaho; 4. Pell Grant; 5. account not belonging to the student; 6. police; 7. security incident; 8. investigation; 9. data security; 10. forensic firm;\n\n1. BYU-Idaho; 2. student accounts; 3. theft; 4. less than 100; 5. compromised; 6. passwords; 7. notification; 8. investigation; 9. security measures; 10. forensic firm;\n\n1. student; 2. account; 3. theft; 4. BYU-Idaho; 5. Pell Grant; 6. $900.16; 7. account not belonging to the student; 8. investigation; 9. security; '}]
[{'generated_text': " 1. weight loss; 2. New Year's Resolutions; 3. healthy habits; 4. permanent; 5. realistic goals; 6. calorie deficit; 7. daily goals; 8. activity; 9. food intake; 10. HealthMetric.\n\nExplanation:\n\n1. weight loss: The main topic of the article.\n2. New Year's Resolutions: The context in which the weight loss topic is presented.\n3. healthy habits: The recommended approach to weight loss.\n4. permanent: The desired outcome of the weight lo

[{'generated_text': ' 1. Ростов-на-Дону; 2. Роспотребнадзор; 3. госпитализированные; 4. ОРВИ; 5. грипп; 6. Ростовская область; 7. период; 8. увеличилось; 9. диагноз; 10. вирус.\n\nExplanation: The article is about an increase in the number of hospitalizations due to ORVI (acute respiratory viral infection) in the Rostov region. The keywords reflect the main topics of the article, including the location, the organization responsible for the information, the number of hospitalized people, the specific diagnosis, and the time period.'}]
[{'generated_text': ' 1. медведи; 2. видео; 3. распространение; 4. Бурятия; 5. расстрел; 6. артель; 7. Сининда-1; 8. Северобайкальский район; 9. прокуратура Бурятии; 10. правоохранительные органы;\n11. дикие животные (optional)\n12. стрельба (optional)\n13. люди (optional)\n14. артельный работник (optional)\n15. отходы (optional)\n16. столовая (optional)\n17. убийство (optional)\n18. медвежат (optional)\n19. обращение (optional)\n20. заявление (optional)'}

[{'generated_text': ' 1. Польша; 2. коронавирус; 3. эпидемия; 4. Матеуш Моравецкий; 5. карантин; 6. премьер-министр; 7. число зараженных; 8. проверки; 9. солдаты; 10. борьба с коронавирусом;\n\n1. Польша; 2. коронавирус; 3. эпидемия; 4. коронавирусная инфекция; 5. Матеуш Моравецкий; 6. премьер-министр; 7. число зараженных (десять тысяч); 8. прогноз; 9. готовятся; 10. Лука'}]
[{'generated_text': ' 1. koronawirus; 2. Włochy; 3. lekarze; 4. zmarli; 5. federacja lekarska; 6. służba zdrowia; 7. Italia; 8. zakażeni; 9. śmierć; 10. reanimacja;\n\n1. koronawirus (COVID-19);\n2. Włochy;\n3. lekarze;\n4. zmarli;\n5. federacja;\n6. służba zdrowia;\n7. Italia;\n8. zakażeni;\n9. śmierć;\n10. reanimacja;\n\nor\n\n1. coronavirus;\n2. Italy;\n3. doctors;\n4. deaths;\n5. Italian Federation of Doctors;\n6.'}]
[{'generated_text': ' 1. Wuhan; 2. epidemia; 3. Chiny; 4. szpitale; 5. Li Keqiang; 6. koronawirus; 7. wizyta; 8. nowe ośrodki; 9. szpitaly; 10. rząd centralny;\n11. przeciążenie; 12. obszary zamies

[{'generated_text': ' 1. культура; 2. работники; 3. бюджет; 4. зарплата; 5. лето; 2020 год; 6. Алтайский край; 7. Елена Безрукова; 8. минимальный размер оплаты труда; 9. урезана; 10. размера оплаты труда;\n\n1. культура; 2. работники; 3. бюджет; 4. зарплата; 5. лето; 6. 2020 год; 7. Алтайский край; 8. Елена Безрукова; 9. меньше минимального размера оплаты труда; 10. урезана;\n\n1. культура;'}]
[{'generated_text': ' 1. epidemia; 2. rząd; 3. zmienione rozporządzenie; 4. ministra zdrowia; 5. koronawirus; 6. Szpital Powiatowy w Skarżysko-Kamiennej; 7. oddział neurologiczny; 8. pielęgniarka; 9. zarażona; 10. COVID-19.\n\nAdditional context: In Poland, a hospital ward was closed due to a nurse being infected with coronavirus. The nurse had also worked at another hospital. All patients and staff on the ward were quarantined and swab samples were taken for testing. The director of the hospital expressed concern about the potential impact on the hospital if the test results were positive. Keywo

[{'generated_text': ' 1. Hopsquad; 2. brewery; 3. opening; 4. August 2018; 5. delays; 6. taproom; 7. North Austin; 8. beers; 9. food truck; 10. Tsuke Honten;\n\n1. Hopsquad Brewing; 2. opening; 3. taproom; 4. North Austin; 5. beers; 6. delays; 7. August 2018; 8. food truck; 9. Tsuke Honten; 10. brewery opening.'}]
[{'generated_text': ' 1. piłkarki; 2. reprezentacja Stanów Zjednoczonych; 3. żądania; 4. sąd; 5. wynagrodzenie; 6. Megan Rapinoe; 7. federacja; 8. FIFA; 9. turniej kobiecy; 10. równość;\n\n1. piłkarki; 2. reprezentacja; 3. żądania; 4. sąd; 5. wynagrodzenie; 6. USA; 7. Megan Rapinoe; 8. federacja; 9. FIFA; 10. równość;\n\n1. piłkarki; 2. reprezentacja; 3. żądania; 4. sąd; 5. płeć; 6. USA; '}]
[{'generated_text': ' 1. Германия; 2. ограничение социальных контактов; 3. COVID-19; 4. власти; 5. продлили; 6. коронавирус; 7. кабмин ФРГ; 8. социальные контакты; 9. решение; 10. июня.'}]
[{'generated_text': ' 1. Россия; 2. брак; 3. законопроект; 4. правительство; 5. возраст; 6. корректи

[{'generated_text': ' 1. карантинные меры; 2. школьники; 3. коронавирус; 4. весенние каникулы; 5. городские власти; 6. подростки; 7. торговые центры; 8. вай-фай; 9. ликвидировать; 10. рейды.\n\nExplanation: The article discusses the challenge of keeping teenagers at home during extended school holidays due to coronavirus measures, and the proposed solution of blocking Wi-Fi in large shopping malls. The keywords reflect the main topics and themes of the article.'}]
[{'generated_text': ' 1. Polska; 2. wyborczy; 3. prezydent; 4. Szymon Hołownia; 5. partia polityczna; 6. PiS; 7. PO; 8. wybory; 9. marketing polityczny; 10. władza.\n\nExplanation: The article discusses the idea of a "bezelpartyjny" (bezeless, or non-partisan) candidate for the Polish presidency, specifically referring to Szymon Hołownia. The author argues that the belief in a non-partisan president being effective is a myth, as the president needs the support of a political party to pass legislation through parliament. The a

[{'generated_text': ' 1. utwory; 2. Gremi Media SA; 3. prawa autorskie; 4. rozpowszechnianie; 5. część; 6. całość; 7. zakaz; 8. wszelkie pola eksploatacji; 9. Interneet; 10. zgoda;\n\nExplanation: The article is about copyright protection for works published in a diary by Gremi Media SA. The keywords reflect the main ideas of the text, including the works in question, the entity responsible for their copyright, the prohibition on reproducing or distributing the works without permission, and the various forms of media through which the works may be distributed.'}]
[{'generated_text': ' 1. encephalitis; 2. Borna disease virus 1 (BoDV-1); 3. Germany; 4. people; 5. deaths; 6. shrews; 7. neurological disease; 8. fatal; 9. researchers; 10. The Lancet Infectious Diseases;\n11. brain-swelling;\n12. virus;\n13. horses;\n14. sheep;\n15. rural areas;\n16. medical professionals;\n17. unclear cause;\n18. endemic regions;\n19. spillover infections;\n20. immunocompromised.'}]
[{'generated_text': ' 1.

[{'generated_text': ' 1. коронавирус; 2. мир; 3. Китай; 4. Европа; 5. США; 6. Россия; 7. Польша; 8. Литва; 9. Норвегия; 10. Эквадор;\n\nKeywords related to the article: coronavirus, world, China, Europe, USA, Russia, Poland, Lithuania, Norway, Equatorial Guinea, Uzbekistan, Africa, cases, deaths, Spain, Italy.'}]
[{'generated_text': ' 1. Emerging Asia; 2. fuel price inflation; 3. interest rates; 4. activity; 5. underlying price pressures; 6. headline inflation; 7. COVID-19; 8. restrictions; 9. economic activity; 10. stimulus measures.\n\nExplanation:\n\n1. Emerging Asia: The article is about the economic situation in Emerging Asia.\n2. Fuel price inflation: The article discusses the drop in fuel price inflation in Emerging Asia.\n3. Interest rates: The article mentions that central banks will keep interest rates low to support activity.\n4. Activity: The article talks about economic activity in Emerging Asia.\n5. Underlying price pressures: The article states that underlying price pres

[{'generated_text': ' 1. Чечня; 2. памятник; 3. медицинские работники; 4. погибшие; 5. Великая Отечественная война; 6. контртеррористическая операция; 7. установлен; 8. Республиканская клиническая больница; 9. Эпендиева; 10. оргкомитет;\n\n1. Чечня; 2. памятник; 3. медицинские работники; 4. погибшие; 5. Великая Отечественная война; 6. контртеррористическая операция; 7. увековечивание; 8. Республиканская клиническая больница; 9. Эпенди'}]
[{'generated_text': ' 1. Ани Лорак; 2. украинская певица; 3. реклама; 4. маски; 5. сториз; 6. фото; 7. критика; 8. Алла Крутая; 9. вечеринка; 10. Лев Лещенко;\n\n1. Ани Лорак; 2. певица; 3. реклама; 4. маска; 5. сториз; 6. фото; 7. удивила; 8. поклонники; 9. Алла Крутая; 10. вечеринка;\n\n1. Ани Лорак; 2. певица; 3. реклама; 4. маска; 5. сториз; 6. фото; '}]
[{'generated_text': ' 1. Татария; 2. строительство; 3. республиканская клиническая инфекционная больница; 4. корпус; 5. Кизань; 6. республиканский бюджет; 7. нового корпуса; 8. койки; 9. реанимацио

[{'generated_text': ' 1. цены; 2. вторичные квартиры; 3. российские города; 4. увеличение; 5. стоимость; 6. ипотечные ставки; 7. активизация; 8. покупатели; 9. инвесторы; 10. регионы\n\nExplanation: The article discusses the increase in prices for second-hand apartments in Russian cities with populations over 300,000 people. The keywords reflect the main topics of the article, including the increase in prices, second-hand apartments, Russian cities, and the reasons for the price rise, such as activating buyers and investors.'}]
[{'generated_text': ' 1. COVID-19; 2. Poland; 3. Mariusz Kamiński; 4. Ministerstwo Spraw Wewnętrznych i Administracji; 5. organizatorzy imprez masowych; 6. decyzje; 7. mecze piłkarskie; 8. PKO Bank Polski Ekstraklasy; 9. zakaz; 10. organizowane;\n\n1. COVID-19 outbreak; 2. Poland government; 3. Mariusz Kamiński (Interior Minister); 4. Decisions on mass events; 5. Organizers; 6. Football matches; 7. PKO Bank Polski Ekstraliga; 8. Ban; 9. No public attendance; 10.

[{'generated_text': ' 1. Челябинская область; 2. ФСБ России; 3. Анаболические препараты; 4. Пограничники; 5. Пункт пропуска "Бугристое"; 6. Обнаружен; 7. Крупная партия; 8. Документы; 9. Запасные части; 10. Компьютерная техника.'}]
[{'generated_text': ' 1. валюты; 2. Узбекистан; 3. курсы; 4. доллар; 5. евро; 6. Центральный банк; 7. рубль; 8. Ташкент; 9. рост; 10. июня; \n\nExplanation: The article is about the changing exchange rates of the dollar, euro, and ruble in Uzbekistan, specifically in the city of Tashkent. The most important keywords are the names of the currencies and the location, as well as the fact that the exchange rates are changing and the specific amounts of the new exchange rates.'}]
[{'generated_text': ' 1. авиарейсы; 2. Тибетские авиалинии; 3. Лхаса; 4. Тибетский автономный район; 5. Китай; 6. развитие туризма; 7. новое лето; 8. города: Куньмин, Чанша, Шицзячжуан; 9. обслуживать; 10. 66 авиарейсов; 11. статистика; 12. туристы; 13. превысил; 14. 40 млн; 15. рост.'}]

[{'generated_text': " 1. курс доллара; 2. Украина; 3. гривня; 4. НБУ; 5. Минфин; 6. Владимир Мазуренко; 7. пороговая цифра; 8. инфляция; 9. МВФ; 10. прогноз.\n\nExplanation: The article discusses the potential increase in the dollar's value in Ukraine, reaching a psychological threshold of 25 UAH/$ by the end of February. The expert, Vladimir Mazurenko, explains the reasons for this trend, including import gas prices, low season for major exporters, and high demand for the dollar. The article also mentions the impact of this situation on inflation and the budget, as well as the MVF's predictions for the value of the hryvnia in the coming years."}]
[{'generated_text': ' 1. fire; 2. story; 3. colors; 4. remission; 5. old tree; 6. vortex; 7. elision; 8. realities; 9. human world; 10. bones.\n\nNote: This text is a poem and not a news article, but I have tried to extract the most relevant keywords based on the given context.'}]
[{'generated_text': " 1. ecommerce industry; 2. India; 3. regu

[{'generated_text': ' 1. Polska; 2. UE; 3. Brexit; 4. Nigel Farage; 5. Europejski parlament; 6. Polexit; 7. decyzje; 8. system rządowy; 9. Brukseli; 10. Radosław Sikorski.\n\nExplanation: The article discusses Nigel Farage\'s statement that Poland may be the next country to leave the EU, a potential "Polexit," and the reactions to this statement from various sources. The most informative keywords are those directly related to the topic of the article.'}]
[{'generated_text': ' 1. Василий Белоусов; 2. Мурманская область; 3. книга; 4. журналист; 5. второе издание; 6. ТАСС; 7. Почетный гражданин; 8. Мурманское Заполярье; 9. изданию; 10. встреча;\n\n1. Василий Белоусов; 2. книга; 3. Мурманская область; 4. журналист; 5. второе издание; 6. ТАСС; 7. Почетный гражданин; 8. Мурманское Заполярье; 9. изданию; 10. встреча с Виктором Сайгиным;\n\n1. Василий Белоусов; 2. книга; 3'}]
[{'generated_text': ' 1. Россия; 2. Роспотребнадзор; 3. коронавирус; 4. показатели; 5. заражение; 6. лаборатории; 7. те

In [None]:
sample["mistral_keywords2"] = sample["mistral_prompt2"].apply(query_mistral)

In [54]:
pattern_mistral = r"^(1|2|3|4|5|6|7|8|9|(10)){1}\.\s*(.*)"

def answer_engineering_mistral(cell):
    new_cell = cell.split('\n\n')[0].strip()
    new_list = []
    for word in new_cell.split(';'):
        match = re.match(pattern_mistral, word.strip(' \n.'))
        if match:
            new_list.append(match.group(3).strip('.'))
    return ";".join(new_list)

sample["mistral_keywords1"] = sample["mistral_keywords1"].apply(answer_engineering_mistral)
sample["mistral_keywords2"] = sample["mistral_keywords2"].apply(answer_engineering_mistral)

In [953]:
def translate_to_ukr(keywords, language, target_lang='uk'):
    try:
        translator = deepl.Translator(key_deepL)
        result = translator.translate_text(keywords, source_lang=language, target_lang=target_lang)
        print(str(result))
        return str(result)
    except Exception as e:
        print(e)
    return keywords

In [65]:
sample["mistral_keywords1_translated"] = sample.apply(lambda row: translate_to_ukr(row.mistral_keywords1, row.url1_lang),
                                                      axis=1)

Ветеран Другої світової війни;коронавірус;лікарня;почесна варта;медсестри;лікарня Тікхілл Роуд;Донкастер;COVID-19;одужання;99-річний
Європейський парламент;Польща;верховенство права;резолюція;Європарламент;Угорщина;становище суддів;процедура;Європейський Союз
Реконструкція;вулиця Богатерова Ґетта Варшавського;Elbud Szczecin;контракт;реалізація;Міське управління доріг і транспорту;ремонт;дорожнє покриття;озеленення;інвестиції
ФАПи;Олександр Котов;Стругокрасненський район;оглянув;нове;медичне обслуговування;села;Молоді, Лудоні, Рівне;водогін;участь
Заняття;перенесено;березень;Школи;Матеуш Моравецький;Прем'єр-міністр;Лукаш Шумовський;Міністр охорони здоров'я;Діти;Марлена Малонґ
Тверська вулиця;Москва;фестиваль "Кримська весна";пішохідна;програма;інтелектуальні ігри; "Що? Де? Коли?";BrainZona;майстер-класи;Тверська вулиця (пішохідна)
Канцлер;Ріші Сунак;самозайняті;£9 млрд;коронавірус;підтримка;£2 500;прибуток;податкова декларація;червень
помер;коронавірус;ускладнення;чоловік;лікарня Тихи;с

мир;глобальний світ;злагоди;толерантності;міжнаціональні відносини;конфлікти;Казахстан;міжетнічна злагода;Нурсултан Назарбаєв;Асамблея народу Казахстану
коронавірус;Росія;Путін;Михайло Мішустін;Тетяна Голікова;Анна Попова;Михайло Мурашко;Євген Зінічев;SARS;MERS
Португалія;інфекція;коронавірус;двоє пацієнтів;лікарні;Порту;новий вірус;Європа;міністр охорони здоров'я;Граса Фрейташ
вбивство;ненавмисне вбивство;Гектор;Ентоні Теббен;Елізабет П. Лінч;стрілянина;судмедексперт округу Хеннепін;домашнє насильство;округ Ренвілл;розслідування
COVID-19;світ;зараження;коронавірус;ранок;число;підтверджених випадків;світове;летальні;Укрінформ;коронавірус;світ;зараження;4,8 мільйона;летальні;Укрінформ;Worldometer;316 тисяч;померлі;число випадків
відкриття, виробництв, торгових, торгових центрів, готелів, епідеміологічних, критеріїв, економіст, роботодавців, Польщі, розморожування, економіки
Виплата 1200 доларів США;уряд США;пандемія COVID-19;пакет допомоги;право на отримання;скоригований валовий дохід;д

Кладовища;Уряд;рішення;Право і справедливість;Ярослав Качинський;адміністратор;в'їзд;права;Повазькі кладовища;процедури
оренда житла;Флорида;правила;місцеве самоврядування;уряд штату;виборні лідери;Сент-Армандс-Кі;Лонгбот-Кі;острів Анна-Марія;законодавство;острів Анна-Марія
Прем'єр-міністр;Аскар Мамін;Казахстан;Республіка;Нур-Султан;Торгівля;Промисловість;Аль-Куварі;Катар;Зустрічі
Анна Ведута;Олексій Венедіков;домагання;Еха Москви;звинувачення;редактор;2012 рік;вечеря;автомобіль;сексуальне harassment
Київ;батько;погода;зник;проведення пошуків;огорож allotment;Кwisy river;прокуратура;підозрюваний
Меган Маркл;принц Гаррі;Арчі;слова;улюблений;мама;тато;книга;собака;джерело
терористи;Шоп'ян;ЛеТ;зіткнення;Шабір Ахмад Малік;Кульгам;Амір Ахмад Дар;Вадіна Мелхура;вилікуваний;підготовлений PK
Конституційний трибунал;Президент ТК Юлія Пшиленбська;Ельжбета Вітек;президентські вибори;термін;Конституція;Виборчий кодекс;спікер Сейму;заява;Конституційний трибунал - провадження
Ломбардія;італійський р

Вибори;Польща;опозиція;правляча коаліція;президент;Дональд Туск;вибори;конституція;Ярослав Качинський;надзвичайний стан
COVID-19;Сіетл;соціальна дистанція;Вашингтон;губернатор Джей Інслі;наказ залишатися вдома;Інститут моделювання хвороб;округ Кінг;швидкість передачі;коронавірус
Конгрес;американський народ;законодавчий процес;Гамільтон;Капітолій США;Палата представників;дебати;президентство;Конституція;конкуруючі інтереси
Карлос Гон;Японія;біг;автомобілебудівний альянс;Renault - Nissan - Mitsubishi;Осака;швидкісний поїзд "Сінкансен";Сінагава;домашній арешт;Ліван
Санкт-Петербург;депутати;Законодавчі збори;дитячий омбудсмен;Світлана Агапітова;Анна Мітяніна;процедура;кандидати;рішення;парламентська комісія
Примас Вишинський;ізоляція;Ева Чачковська;Ясна Гура;беатифікація;цикл;віра;надія;любов;інтерновані
дані;ви;сайт;інформація;матеріал;реклама;обробка;конфіденційність;політика;згода;мета;файли cookie
інфекція;коронавірус;SARS-CoV-2;Малопольська;лабораторія;тестування;мешканці;госпіталізов

Байєльса;Верховний суд;вибори;обраний губернатор;АПК;порушення;рішення;президент Мухаммаду Бухарі;ІНЕК;НДП
роздрібний кредитний портфель;угорські банки;листопад;MNB;7,022 трлн форинтів;операції;переоцінки;роздрібні кредити;роздрібні депозити;чисті притоки/відтоки
Тіло Христове;процесії;церкви;святкування;душпастирство;здоров'я;послання;єпархії;учасники;SARS-CoV-2
Президентські. вибори. 2020. відбудуться. 28. червня,. кандидати. у. президенти. коротко. про. передвиборчі. програми. на. посаду. президента.
Павлас Адамович;смертельний напад;Гданськ;TVN;рік потому;фото;президент;Вугільний ринок;меморіальна дошка;трагічна смерть
коронавірус;пандемія;Всесвітня організація охорони здоров'я (ВООЗ);Маргарет Гарріс;Європа;США;спалах;зараження;тестування;вакцина
SpaceX;NASA;місія;запуск;ракета Falcon 9;погодні умови;скасовано;американська команда;російська;Міжнародна космічна станція
погода;Україна;температура;південь;сонячна;схід;дощі;град;північ;центр
іпотека;Асоціація компаній фінансового посер

Сербія;комендантська година;Александр Вучич;пандемія;вірус SARS-CoV-2;запроваджено;вихідні;заборона;збіговиська;заходи
Żabka Polska;Jolanta Bańczerowska;кадрова політика;2016;розвиток;сучасні;процеси рекрутингу;системи оплати праці та преміювання;змінена організаційна культура;відкрита та дружня
Нацизм;Третій Рейх;Гітлер;расизм;ідеологія;відродження;людство;антикомунізм;націонал-соціалізм;єврей
атомна електростанція;Рівненська;відключила;Енергоатом;енергоблок;автоматичний захист;циркуляційні насоси;подія;INES;відключений
Михайло Мішустін;Прем'єр-міністр;Леонід Зорін;сценарист;драматург;письменник;смерть;театр;кінематограф;твори
Ана Чері;фото;фітнес-модель;Instagram;вбрання;контент;карантин;шанувальники;розкішні форми;коментарі
Рада Федерації;Держдума;поправки;заборона;нікотиновмісні суміші;електронні сигарети;вейпи;кальяни;тютюнова продукція;Валерій Рязанський
Президентські вибори;Анджей Дуда;Рафал Тшасковський;Конфедерація;Артур Дзямбор;другий тур;ПіС;РО;виборці;вибір між гільйотиною 

опитування;поляки;виборці;парламентські;вибори;Шимон Головня;партія;Koalicja Obywatelska;Lewica;Polskie Stronnictwo Ludowe
Єкатеринбург;кримінальна справа;депутат;Костянтин Кисельов;погрози;поліція;ГУ МВС РФ;прокуратура;ст.119 КК РФ;погроза вбивством
економічна криза;коронавірус;капіталізм;Насім Талеб;смертність;ВООЗ;виробництво;пандемія;інституції;економіки
Росія;Президент;Дмитро Пєсков;Ідлібі;Сирія;загинули;військовослужбовці;Турецька республіка;безпека;кордон
Австрія;коронавірус;Італія;потяг;прикордонники;лихоманка;ТПВ;OeBB;Reuters
нафта;ОПЕК+;виробники;видобуток;угода;ціни;учасники;скорочення;зустріч;Ірак, Нігерія
Вартові Галактики;Кіновсесвіт Marvel;Netflix;Hulu;Amazon Prime;стрімінговий сервіс;Disney+;Вартові Галактики 2;фільми Marvel;прокат або купівля
Міністерство освіти;Ігор Карпенко;випускні вечори;навчальні заклади;проведення;республіканський "Останній дзвоник";Бобруйськ;атестати;адміністрація навчальних закладів;школи
Банк Росії;кредитні організації;банківські операції;безп

Вибори;уряд;місцева;адміністрація;конституційний;обов'язок;діяти;перший;термін;стан;катастрофи;конституційні;підстави;очікування;Ярослав Качинський
сказ;Крим;дикі тварини;лисиці;бродячі собаки;вакцинація;ветеринарна лікарня;Россільгоспнагляд;2020 рік;Судакське лісомисливське господарство
Росія;Єдина Росія;Крим;чиновники;блокада;погода;фотосесія;шуби;влада;історія
Коронавірус;Італія;пандемія;померлі;охорона здоров'я;служба;Італія;оголошення;Cesvi;Gofundme;лікарі
Польща;коронавірус;епідемія;Матеуш Моравецький;карантин;прем'єр-міністр;кількість заражених;перевірки;солдати;боротьба з коронавірусом
коронавірус;Італія;лікарі;померли;медична федерація;служба охорони здоров'я;Італія;інфіковані;смерть;реанімація
Ухань;епідемія;Китай;лікарні;Лі Кецян;коронавірус;візит;нові заклади;лікарні;центральний уряд
вірусологи;пандемія;COVID-19;правила;травневі свята;безпека;дача;соціальна дистанція;харчування;вірусолог
Якутськ;Вілюйський район;Якутія;кримінальна справа;вбивство;рецидивіст;морзі;центральна

Rossmann;OnlyBio;заправка;Косметика;миючі засоби;Виробництво;Лодзь;органічний;натуральний;автоматичний
Набережні Челни;Subaru;офіційний дилерський центр;ТрансТехСервіс;японська марка;автоцентр;нове обладнання;комплексне післяпродажне обслуговування;дилери Subaru;РФ
Excel;гіперпосилання;стовпець;перетягування;автоматичне заповнення;клітинки;MJN1001-MJN1100;папка;функція копіювання;наступна клітинка
Детройт Пістонс;НБА;сезон;рекорд;відновлення;травми;склад;молоді гравці;драфт;перспективи
автофіксація;порушення;ПДР;знизилося;Міністр;внутрішніх справ;Арсен Аваков;Київ;швидкість;дороги
погода;Україна;Крим;поліція;нарколабораторія;виробництво наркотиків;підпільна група;кримінальна справа;наркотики;Псковська область
твори;Gremi Media SA;авторське право;розповсюдження;частина;ціле;заборона;всі сфери використання;Інтернет;згода
енцефаліт;вірус хвороби Борна 1 (BoDV-1);Німеччина;люди;смерть;землерийки;неврологічне захворювання;смертельний;дослідники;The Lancet Infectious Diseases
Деві, штат Флор

погоня;крадіжка;Желєзногорськ;Красноярський край;смартфон;банківський рахунок;поліція;зловмисниця;гаджет;пароль
Мішонна;Ходячі мерці;10-й сезон;повернення;Данія Гуріра;Вергілій;зброя;Шептуни;Ізабель Керролл;перипетії
парад домашніх тварин;Національний центр кваліфікованої сестринської допомоги;Подяка персоналу;Центр здоров'я Доброго Пастиря;Cornerstone Assisted Living;кози;коти;собаки;хендлери;заходи
ігри;2020;дозволи;PlayStation 5;Xbox Series X;відставання;майстер;монетизація;Switch;нові
виробництво;кисневі вентилі;Дагестан;Каспійськ;завод "Дагдизель";кисень;медичне обладнання;конструкторська документація;пандемія;Володимир Васильєв
закохані;карантин;ситуація;відстань;переживати;любов;підсумків;страждають;кордони;вірус
Тольятті;Самарська область;університет;онлайн-навчання;нова коронавірусна інфекція;профілактика;пресслужба;Повідомляє;16 березня;Росдистант
Московська житлоінспекція;продовження строку;перепланування;Москва;дозволи;рішення;строку дії;Мій Дім Москва;будівельні роботи;вин

In [None]:
sample["mistral_keywords2_translated"] = sample.apply(lambda row: translate_to_ukr(row.mistral_keywords2, row.url2_lang),
                                                      axis=1)

In [None]:
sample.to_csv("sources.csv")