# Import Libraries

In [1]:
import pandas as pd
import re
import string
import nltk

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier


nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\karta\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\karta\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Dataset

In [4]:
df = pd.read_csv('tweets.csv')

In [5]:
df.shape

(25798, 12)

In [6]:
df.sample(3)

Unnamed: 0,id,id_str,created_at,text,name,screen_name,location,followers_count,geo,place,retweet_count,favorite_count
23039,1.516156e+18,1516155893410316298,Mon Apr 18 20:45:08 +0000 2022,RT @yoshkinkrot: ‚ùóÔ∏è –•—Ä–∏—Å—Ç–æ –ì—Ä–æ–∑–µ–≤: ¬´–°–æ–æ–±—â–∞–ª–æ—Å—å...,Djemal –î–∂–µ–º–ê–ª—å—Ç üá∑üá∫üá∫üá¶üá´üá∑,Djemalozieux,,356.0,,NoData,23.0,0.0
4147,1.51437e+18,1514370263705653256,Wed Apr 13 22:29:41 +0000 2022,RT @Vcex_naxui: 2 –∏—é–Ω—è 2014 –≥–æ–¥–∞. –£–∫—Ä–∞–∏–Ω–∞. –õ—É–≥...,–ï–≤–≥–µ–Ω–∏–π –ö—É—Å—Ç–æ–≤,eugene_kust,–†–æ—Å—Å–∏—è,549.0,,NoData,148.0,0.0
11645,1.516566e+18,1516565503942275077,Tue Apr 19 23:52:47 +0000 2022,RT @antiputler_news: –†–æ—Å—Å–∏—è —Ä–∞–∑–≤–µ—Ä–Ω—É–ª–∞ –ó–†–ö –°-4...,–í–∞–ª–µ—Äi–π,i01435933,–£–∫—Ä–∞—ó–Ω–∞,1071.0,,NoData,363.0,0.0


# Pre-processing

## Cleaning up tweets

In [7]:
df = df[df['text'].notna()]

Remove all mentions of users (@***)

In [8]:
df['text_clean'] = [re.sub(r'@\w+', r'', x) for x in df['text']]

Remove line breaks

In [9]:
df['text_clean'] = [x.replace('\n', '') for x in df['text_clean']]

Convert to lowercase

In [10]:
df['text_clean'] = [x.lower() for x in df['text_clean']]

Remove mentions rt

In [11]:
df['text_clean'] = [re.sub(r'rt :', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'rt', r'', x) for x in df['text_clean']]

Remove all links

In [12]:
df['text_clean'] = [re.sub(r'http\S+', r'', x) for x in df['text_clean']]

Remove emoji

In [13]:
emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)

In [14]:
df['text_clean'] = [emoji_pattern.sub(r'', x) for x in df['text_clean']]

In [15]:
df['text_clean'] = [re.sub(r'‚ùóÔ∏è', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'‚öîÔ∏è', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'‚ö°', r'', x) for x in df['text_clean']]

Remove extra spaces

In [16]:
df['text_clean'] = [x.strip() for x in df['text_clean']]

Remove partial punctuation

In [17]:
df['text_clean'] = [re.sub(r'"', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'¬´', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'¬ª', r'', x) for x in df['text_clean']]
df['text_clean'] = [re.sub(r'!', r'', x) for x in df['text_clean']]
#df['text_clean'] = [re.sub(r'...', r'', x) for x in df['text_clean']]

Remove numbers

In [18]:
df['text_clean'] = [re.sub('\d+', '', x) for x in df['text_clean']]

## What we have after cleaning

In [19]:
df[['text', 'text_clean']].sample(10)

Unnamed: 0,text,text_clean
4653,@ds107m6p @ron_ov @dw_russian –ú–Ω–µ —Ö–æ—Ä–æ—à–æ –∏–∑–≤–µ—Å...,"–º–Ω–µ —Ö–æ—Ä–æ—à–æ –∏–∑–≤–µ—Å—Ç–Ω–æ, —á—Ç–æ —Ç–∞—Ç–∞—Ä—ã –∫—Ä—ã–º—Å–∫–∏–µ –∏ —Ç–æ—Ç..."
24847,RT @antiputler_news: üò© –í–æ—Ç —Ç–∞–∫–æ–π —Å–µ–≥–æ–¥–Ω—è –±—ã–ª –ø...,–≤–æ—Ç —Ç–∞–∫–æ–π —Å–µ–≥–æ–¥–Ω—è –±—ã–ª –ø—Ä–∏–ª–µ—Ç –ø–æ –≥–∞—Ä–∞–∂–Ω—ã–º –∫–æ–æ–ø–µ...
22083,RT @EvaBorisovna: –Ø –ª–∏—á–Ω–æ —Å–æ–≥–ª–∞—Å–Ω–∞ –æ—Ç–∫–∞–∑–∞—Ç—å—Å—è ...,"—è –ª–∏—á–Ω–æ —Å–æ–≥–ª–∞—Å–Ω–∞ –æ—Ç–∫–∞–∑–∞—Ç—å—Å—è –æ—Ç —Ä—É—Å—Å–∫–æ–π –º—É–∑—ã–∫–∏,..."
5924,"RT @Grantoedov: ‚ö°Ô∏è–ù–æ–≤—ã–µ —Ç—Ä–æ—Ñ–µ–∏ –∏–∑ –ü–û–ü–ê–°–ù–û–ô, —É–∂...","Ô∏è–Ω–æ–≤—ã–µ —Ç—Ä–æ—Ñ–µ–∏ –∏–∑ –ø–æ–ø–∞—Å–Ω–æ–π, —É–∂–µ —É –∫–∞–∑–∞–∫–æ–≤ –Ω–∞ –≤–æ..."
11281,–ù–∞ –≤–æ–π–Ω–µ –ø–æ–≥–∏–±–ª–∏ –¥–æ 3000 —É–∫—Ä–∞–∏–Ω—Å–∫–∏—Ö —Å–æ–ª–¥–∞—Ç. –ü–æ...,–Ω–∞ –≤–æ–π–Ω–µ –ø–æ–≥–∏–±–ª–∏ –¥–æ —É–∫—Ä–∞–∏–Ω—Å–∫–∏—Ö —Å–æ–ª–¥–∞—Ç. –ø–æ—Ç–µ—Ä–∏...
6347,RT @wargonzoo: ‚ö°Ô∏è–ê–≤–¥–µ–µ–≤—Å–∫–∏–π –∫–æ—Ç—ë–ª‚ö°Ô∏è–û–±—Å—Ç–∞–Ω–æ–≤–∫–∞ ...,Ô∏è–∞–≤–¥–µ–µ–≤—Å–∫–∏–π –∫–æ—Ç—ë–ªÔ∏è–æ–±—Å—Ç–∞–Ω–æ–≤–∫–∞ –Ω–∞ –≤–µ—á–µ—Ä ..Ô∏è–ø–æ—Å–ª–µ...
17231,RT @leonidvolkov: –ù–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç –Ω–∏ –æ–¥–Ω–æ–≥–æ –ø–µ—Ä–µ...,–Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç –Ω–∏ –æ–¥–Ω–æ–≥–æ –ø–µ—Ä–µ–≤–æ–¥–∞ –Ω–∏ –Ω–∞ –æ–¥–∏–Ω —Ä—É...
4548,RT @euro2012uaorg: –í–∏–¥–µ–æ –∏–∑ –†—É–±–µ–∂–Ω–æ–≥–æ –õ—É–≥–∞–Ω—Å–∫–æ...,–≤–∏–¥–µ–æ –∏–∑ —Ä—É–±–µ–∂–Ω–æ–≥–æ –ª—É–≥–∞–Ω—Å–∫–æ–π –æ–±–ª–∞—Å—Ç–∏. —Ç—è–∂–µ–ª–∞—è ...
14795,RT @dedzaebal: –õ–∞–≤—Ä–æ–≤ : –†–æ—Å—Å–∏—è –Ω–µ —Å–æ–±–∏—Ä–∞–µ—Ç—Å—è –º...,–ª–∞–≤—Ä–æ–≤ : —Ä–æ—Å—Å–∏—è –Ω–µ —Å–æ–±–∏—Ä–∞–µ—Ç—Å—è –º–µ–Ω—è—Ç—å —Ä–µ–∂–∏–º –≤ —É...
14533,RT @Geschichter: –ö—Ä–µ–º–ª—å: –ï—Å–ª–∏ –£–∫—Ä–∞–∏–Ω–∞ –Ω–µ —Å–¥–∞—Å—Ç...,"–∫—Ä–µ–º–ª—å: –µ—Å–ª–∏ —É–∫—Ä–∞–∏–Ω–∞ –Ω–µ —Å–¥–∞—Å—Ç—Å—è, –º—ã —è–¥–µ—Ä–Ω—É—é –±–æ..."


## Tokenization

In [20]:
russian_stop_words = stopwords.words("russian")
snowball = SnowballStemmer(language='russian')


def tokenize_text(x):
    tokens = word_tokenize(x, language='russian')
    tokens_no_punkt = [i for i in tokens if i not in string.punctuation]
    tokens_no_stopwords = [i for i in tokens_no_punkt if i not in russian_stop_words]
    stemmed_tokens = [snowball.stem(i) for i in tokens_no_stopwords]
    return stemmed_tokens

In [21]:
df['text_tokenized'] = [tokenize_text(x) for x in df['text_clean']]

In [22]:
df[['text', 'text_clean', 'text_tokenized']].sample(15)

Unnamed: 0,text,text_clean,text_tokenized
12025,"–í–°–£ —É–Ω–∏—á—Ç–æ–∂–∏–ª–∏ –æ–¥–Ω–æ–≥–æ –∏–∑ –∫–æ–º–∞–Ω–¥–∏—Ä–æ–≤ –ø–æ—Ç–µ—à–Ω–æ–π ""...",–≤—Å—É —É–Ω–∏—á—Ç–æ–∂–∏–ª–∏ –æ–¥–Ω–æ–≥–æ –∏–∑ –∫–æ–º–∞–Ω–¥–∏—Ä–æ–≤ –ø–æ—Ç–µ—à–Ω–æ–π –ª...,"[–≤—Å—É, —É–Ω–∏—á—Ç–æ–∂, –æ–¥–Ω, –∫–æ–º–∞–Ω–¥–∏—Ä, –ø–æ—Ç–µ—à–Ω, –ª–Ω—Ä, –º–∏—à..."
12072,RT @chuuyamyboy: —Ç—Ä–µ–¥!! –ø–µ—Ä—Å–æ–Ω–∞–∂–∏ –±—Å–¥ –∫–∞–∫ —É–∫—Ä–∞...,—Ç—Ä–µ–¥ –ø–µ—Ä—Å–æ–Ω–∞–∂–∏ –±—Å–¥ –∫–∞–∫ —É–∫—Ä–∞–∏–Ω—Å–∫–∏–µ –ø–æ–¥—Ä–æ—Å—Ç–∫–∏ –≤–æ...,"[—Ç—Ä–µ–¥, –ø–µ—Ä—Å–æ–Ω–∞–∂, –±—Å–¥, —É–∫—Ä–∞–∏–Ω—Å–∫, –ø–æ–¥—Ä–æ—Å—Ç–∫, –≤—Ä–µ–º..."
6374,RT @rianru: –ó–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ —Å—É—Ç–∫–∏ –≤ –î–ù–† –ø–æ–≥–∏–±–ª–∏ —Ç...,–∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ —Å—É—Ç–∫–∏ –≤ –¥–Ω—Ä –ø–æ–≥–∏–±–ª–∏ —Ç—Ä–æ–µ –≤–æ–µ–Ω–Ω–æ—Å–ª...,"[–ø–æ—Å–ª–µ–¥–Ω, —Å—É—Ç–∫, –¥–Ω—Ä, –ø–æ–≥–∏–±–ª, —Ç—Ä–æ, –≤–æ–µ–Ω–Ω–æ—Å–ª—É–∂–∞,..."
7075,RT @altangerelch1: –ê–ù–£ –£–∫—Ä–∞–π–Ω–¥ –¥–æ–Ω–±–∞—Å –±–æ–ª–æ–Ω –∑“Ø...,–∞–Ω—É —É–∫—Ä–∞–π–Ω–¥ –¥–æ–Ω–±–∞—Å –±–æ–ª–æ–Ω –∑“Ø“Ø–Ω —É–∫—Ä–∞–π–Ω—ã–≥ —á”©–ª”©”©–ª”©...,"[–∞–Ω, —É–∫—Ä–∞–π–Ω–¥, –¥–æ–Ω–±–∞—Å, –±–æ–ª–æ–Ω, –∑“Ø“Ø–Ω, —É–∫—Ä–∞–π–Ω—ã–≥, —á..."
4399,RT @GirkinGirkin: –õ—É–≥–∞–Ω—Å–∫ https://t.co/Ihku9y8UH3,–ª—É–≥–∞–Ω—Å–∫,[–ª—É–≥–∞–Ω—Å–∫]
6730,"RT @DonbassSegodnya: –ú–∞—Ä–∏—É–ø–æ–ª—å, –Ω–∞ —Ç—Ä–∞—Å—Å–µ —Å—Ç–æ–∏...","–º–∞—Ä–∏—É–ø–æ–ª—å, –Ω–∞ —Ç—Ä–∞—Å—Å–µ —Å—Ç–æ–∏—Ç –ø—Ä–µ—Å—Ç–∞—Ä–µ–ª–∞—è –∂–µ–Ω—â–∏–Ω–∞...","[–º–∞—Ä–∏—É–ø–æ–ª, —Ç—Ä–∞—Å—Å, —Å—Ç–æ, –ø—Ä–µ—Å—Ç–∞—Ä–µ–ª, –∂–µ–Ω—â–∏–Ω, —Å–≤–µ—á..."
11151,@abunin –ó–µ–ª–µ–Ω—Å–∫–∏–π –≤ –ø–∞–Ω–∏–∫–µ üëÄ,–∑–µ–ª–µ–Ω—Å–∫–∏–π –≤ –ø–∞–Ω–∏–∫–µ,"[–∑–µ–ª–µ–Ω—Å–∫, –ø–∞–Ω–∏–∫]"
8872,"RT @eskovoroda: –ù–∞–ø–∏—Å–∞–ª–∏, –∫–∞–∫ –ª—é–¥–∏ –ø–æ–∫–∏–¥–∞–ª–∏ –ú–∞...","–Ω–∞–ø–∏—Å–∞–ª–∏, –∫–∞–∫ –ª—é–¥–∏ –ø–æ–∫–∏–¥–∞–ª–∏ –º–∞—Ä–∏—É–ø–æ–ª—å ‚Äî –æ–¥–Ω–æ –∏...","[–Ω–∞–ø–∏—Å–∞, –ª—é–¥, –ø–æ–∫–∏–¥–∞, –º–∞—Ä–∏—É–ø–æ–ª, ‚Äî, –æ–¥–Ω, —Å–∞–º, —Å..."
21066,RT @RozovayaShayrma: –¥–∞ –±–ª—è—Ç—å —ç—Ç–∞ —Ö—É–π–Ω—è –ø—Ä–æ–∏—Å—Ö...,–¥–∞ –±–ª—è—Ç—å —ç—Ç–∞ —Ö—É–π–Ω—è –ø—Ä–æ–∏—Å—Ö–æ–¥–∏—Ç –≥–æ–¥–∞–º–∏–º—É–¥–ª–æ –æ—Ç—Ä—É...,"[–±–ª—è—Ç, —ç—Ç, —Ö—É–π–Ω, –ø—Ä–æ–∏—Å—Ö–æ–¥, –≥–æ–¥–∞–º–∏–º—É–¥–ª, –æ—Ç—Ä—É–±–∞,..."
6110,RT @Alla91748059: –†–æ—Å—Å–∏—è —Ç–µ—Ä—è–µ—Ç –ø–æ–¥–¥–µ—Ä–∂–∫—É –ö–∞–∑–∞...,—Ä–æ—Å—Å–∏—è —Ç–µ—Ä—è–µ—Ç –ø–æ–¥–¥–µ—Ä–∂–∫—É –∫–∞–∑–∞—Ö—Å—Ç–∞–Ω–∞ –ø–æ—Å–ª–µ –Ω–∞—á–∞–ª...,"[—Ä–æ—Å—Å, —Ç–µ—Ä—è, –ø–æ–¥–¥–µ—Ä–∂–∫, –∫–∞–∑–∞—Ö—Å—Ç–∞, –Ω–∞—á–∞, –≤–æ–π–Ω, —É..."


# Toxic Model

Russian Language Toxic Comments https://www.kaggle.com/datasets/blackmoon/russian-language-toxic-comments

In [23]:
df_train = pd.read_csv('labeled.csv')

In [24]:
df_train

Unnamed: 0,comment,toxic
0,"–í–µ—Ä–±–ª—é–¥–æ–≤-—Ç–æ –∑–∞ —á—Ç–æ? –î–µ–±–∏–ª—ã, –±–ª...\n",1.0
1,"–•–æ—Ö–ª—ã, —ç—Ç–æ –æ—Ç–¥—É—à–∏–Ω–∞ –∑–∞—Ç—é–∫–∞–Ω–æ–≥–æ —Ä–æ—Å—Å–∏—è–Ω–∏–Ω–∞, –º–æ–ª...",1.0
2,–°–æ–±–∞–∫–µ - —Å–æ–±–∞—á—å—è —Å–º–µ—Ä—Ç—å\n,1.0
3,"–°—Ç—Ä–∞–Ω–∏—Ü—É –æ–±–Ω–æ–≤–∏, –¥–µ–±–∏–ª. –≠—Ç–æ —Ç–æ–∂–µ –Ω–µ –æ—Å–∫–æ—Ä–±–ª–µ–Ω–∏...",1.0
4,"—Ç–µ–±—è –Ω–µ —É–±–µ–¥–∏–ª 6-—Å—Ç—Ä–∞–Ω–∏—á–Ω—ã–π –ø–¥—Ñ –≤ —Ç–æ–º, —á—Ç–æ –°–∫—Ä...",1.0
...,...,...
14407,–í–æ–Ω—é—á–∏–π —Å–æ–≤–∫–æ–≤—ã–π —Å–∫–æ—Ç –ø—Ä–∏–±–µ–∂–∞–ª –∏ –Ω–æ–µ—Ç. –ê –≤–æ—Ç –∏...,1.0
14408,–ê –∫–æ–≥–æ –ª—é–±–∏—Ç—å? –ì–æ–±–ª–∏–Ω–∞ —Ç—É–ø–æ—Ä—ã–ª–æ–≥–æ —á—Ç–æ-–ª–∏? –ò–ª–∏ ...,1.0
14409,"–ü–æ—Å–º–æ—Ç—Ä–µ–ª –£—Ç–æ–º–ª–µ–Ω–Ω—ã—Ö —Å–æ–ª–Ω—Ü–µ–º 2. –ò –æ–∫–∞–∑–∞–ª–æ—Å—å, —á...",0.0
14410,–ö–†–´–ú–û–¢–†–ï–î –ù–ê–†–£–®–ê–ï–¢ –ü–†–ê–í–ò–õ–ê –†–ê–ó–î–ï–õ–ê –¢.–ö –í –ù–ï–ú –ù...,1.0


In [25]:
df_train['toxic'] = df_train['toxic'].astype(int)

In [26]:
df_train['comment_clear'] = df_train['comment'].str.lower()
df_train['comment_clear'] = [re.sub('\d+', '', x) for x in df_train['comment_clear']]

In [27]:
df_train['comment_tokenized'] = [tokenize_text(x) for x in df_train['comment_clear']]

In [28]:
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer(tokenizer = lambda x: tokenize_text(x))),
    ("model", RandomForestClassifier(n_jobs=-1))
])

In [29]:
pipeline.fit(df_train['comment_clear'], df_train['toxic'])

Pipeline(steps=[('vectorizer',
                 TfidfVectorizer(tokenizer=<function <lambda> at 0x7fcf485c9af0>)),
                ('model', RandomForestClassifier(n_jobs=-1))])

## Apply on our tweets

In [31]:
df['toxic_proba'] = list(pipeline.predict_proba(df['text_clean']))
df['toxic'] = list(pipeline.predict(df['text_clean']))

In [33]:
df[['text', 'text_clean', 'toxic_proba', 'toxic']].sample(10)

Unnamed: 0,text,text_clean,toxic_proba,toxic
25299,"RT @sashatimofeevax: ""–ó–∞—á–µ–º –Ω–∞–º –Ω–∞—Å–∏–ª–æ–≤–∞—Ç—å —É–∫—Ä...",–∑–∞—á–µ–º –Ω–∞–º –Ω–∞—Å–∏–ª–æ–≤–∞—Ç—å —É–∫—Ä–∞–∏–Ω–æ–∫?- –≤–æ–∑–º—É—Ç–∏–ª–∏—Å—å —Ä—É...,"[0.4, 0.6]",1
8448,RT @i_army_org: –ü–æ—Å–ª–µ–¥—Å—Ç–≤–∏—è –≤—Å—Ç—É–ø–ª–µ–Ω–∏—è –≤ –ù–ê–¢–û ...,–ø–æ—Å–ª–µ–¥—Å—Ç–≤–∏—è –≤—Å—Ç—É–ø–ª–µ–Ω–∏—è –≤ –Ω–∞—Ç–æ –æ—á–µ–≤–∏–¥–Ω—ã - —ç—Ç–æ –±...,"[0.84, 0.16]",0
3616,RT @rianru: –ö–∏–µ–≤ —Ö–æ—á–µ—Ç –æ–±—Ä–∞—Ç–∏—Ç—å—Å—è –∫ –ú–æ—Å–∫–≤–µ —Å —Ç...,–∫–∏–µ–≤ —Ö–æ—á–µ—Ç –æ–±—Ä–∞—Ç–∏—Ç—å—Å—è –∫ –º–æ—Å–∫–≤–µ —Å —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–µ–º –≤...,"[0.8, 0.2]",0
14869,RT @KattieBanned: @prostotak182 @hUDhfnQFzM65l...,"—Ç–æ, —á—Ç–æ —Å–ø–µ—Ü–æ–ø–µ—Ä–∞—Ü–∏—è –ø–æ –∑–∞—Ö–≤–∞—Ç—É –≤–ª–∞—Å—Ç–∏ –≤ —É–∫—Ä–∞–∏...","[0.48, 0.52]",1
5844,"@christogrozev –ó–Ω–∞–µ—Ç–µ, —É –∫–æ–≥–æ –¢–û–ß–ù–û –û–§–ò–¶–ò–ê–õ–¨–ù–û...","–∑–Ω–∞–µ—Ç–µ, —É –∫–æ–≥–æ —Ç–æ—á–Ω–æ –æ—Ñ–∏—Ü–∏–∞–ª—å–Ω–æ % –µ—Å—Ç—å —Ç–æ—á–∫–∞-—É...","[0.88, 0.12]",0
20694,RT @ttt_mir_no: @stranabolna –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç –∑–∞–±–∞—Å—Ç...,–ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç –∑–∞–±–∞—Å—Ç–æ–≤–∫—É –≤ –≤–∏–¥–µ —Å–∏–Ω—Ö—Ä–æ–Ω–Ω—ã—Ö –±–æ–ª—å–Ω–∏...,"[0.96, 0.04]",0
8535,RT @globusnewsorg: –ü—Ä–µ–º—å–µ—Ä-–º–∏–Ω–∏—Å—Ç—Ä –§–∏–Ω–ª—è–Ω–¥–∏–∏ –∑...,–ø—Ä–µ–º—å–µ—Ä-–º–∏–Ω–∏—Å—Ç—Ä —Ñ–∏–Ω–ª—è–Ω–¥–∏–∏ –∑–∞—è–≤–∏–ª–∞ –æ –≤—Å—Ç—É–ø–ª–µ–Ω–∏–∏...,"[0.94, 0.06]",0
12803,RT @nastyabakulina_: –≠—Ç–æ–π –∏–º–ø–µ—Ä–∏–∏ –Ω—É–∂–µ–Ω –∏ –ö–∏–µ–≤...,"—ç—Ç–æ–π –∏–º–ø–µ—Ä–∏–∏ –Ω—É–∂–µ–Ω –∏ –∫–∏–µ–≤, –∏ –∫—Ä—ã–º.–º–æ–∂–µ—Ç –±—ã—Ç—å —è...","[0.7, 0.3]",0
19178,RT @KermlinRussia: –í –∫–æ–Ω—Ü–µ –∞–ø—Ä–µ–ª—è –∞–≥—Ä–µ—Å—Å–∏–≤–Ω—ã–π ...,–≤ –∫–æ–Ω—Ü–µ –∞–ø—Ä–µ–ª—è –∞–≥—Ä–µ—Å—Å–∏–≤–Ω—ã–π –±–ª–æ–∫ –Ω–∞—Ç–æ –ø–ª–∞–Ω–∏—Ä—É–µ—Ç...,"[0.6825, 0.3175]",0
23972,RT @anders_aslund: –≤ –æ–∂–∏–¥–∞–Ω–∏–∏ –±–∏—Ç–≤—ã –∑–∞ –î–æ–Ω–±–∞—Å—Å...,–≤ –æ–∂–∏–¥–∞–Ω–∏–∏ –±–∏—Ç–≤—ã –∑–∞ –¥–æ–Ω–±–∞—Å—Å –±–æ—Ä—å–±–∞ –≤ –∫—Ä–µ–º–ª–µ –ø—Ä...,"[0.4, 0.6]",1
