In [52]:
import pandas as pd
import numpy as np

In [53]:
# !wget https://storage.yandexcloud.net/natasha-navec/packs/navec_hudlit_v1_12B_500K_300d_100q.tar

In [54]:
# %pip install navec 

In [55]:
from navec import Navec

path = 'navec_hudlit_v1_12B_500K_300d_100q.tar'
navec = Navec.load(path)

In [56]:
# %pip install pymystem3

In [57]:
df = pd.read_csv('/home/jupyter/mnt/s3/bank-data/features/test_for_participants.csv')

In [58]:
df.head()

Unnamed: 0.1,Unnamed: 0,sentence
0,0,"Очень неприятная ситуация, надеюсь, банк либо ..."
1,1,За что выражаю благодарность и банку и данному...
2,2,"Вывод: информация полученная в смс от банка, и..."
3,3,Хочу по благодарить ее за чуткое отношение к н...
4,4,"Показал, что я и вклад могу свой пополнять пря..."


In [59]:
df.shape

(2152, 2)

In [60]:
import re

from pymystem3 import Mystem
from nltk.corpus import stopwords

import nltk
nltk.download('stopwords')

mystem = Mystem() 
russian_stopwords = stopwords.words("russian")
#english_stopwords = stopwords.words("english")
stopwords = russian_stopwords #+ english_stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/jupyter/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [61]:
from collections import Counter

In [62]:
def lemmatize_sentence(x):
    x = x.strip()
    x = x.lower()
    pattern = r'[^А-Яа-яЁё\s]'
    x = re.sub(pattern, '', x)
    tokens = mystem.lemmatize(x)
    tokens = [token for token in tokens if token not in stopwords and not token.isspace()]
    return tokens

In [63]:
df['lemmatized'] = [lemmatize_sentence(sentence) for sentence in df['sentence']]

In [64]:
df.head()

Unnamed: 0.1,Unnamed: 0,sentence,lemmatized
0,0,"Очень неприятная ситуация, надеюсь, банк либо ...","[очень, неприятный, ситуация, надеяться, банк,..."
1,1,За что выражаю благодарность и банку и данному...,"[выражать, благодарность, банка, данный, сотру..."
2,2,"Вывод: информация полученная в смс от банка, и...","[вывод, информация, получать, смс, банк, инфор..."
3,3,Хочу по благодарить ее за чуткое отношение к н...,"[хотеть, благодарить, чуткий, отношение, пожил..."
4,4,"Показал, что я и вклад могу свой пополнять пря...","[показывать, вклад, мочь, свой, пополнять, пря..."


In [17]:
def get_most_common_words(sentiment):
    all_words = []
    if sentiment == 'all':
        for sentence in df['lemmatized']:
            all_words.extend(sentence)
    else:
        for sentence in df[df['sentiment'] == sentiment]['lemmatized']:
            all_words.extend(sentence)
    counter = Counter(all_words)
    return [pair[0] for pair in counter.most_common(100)]

In [18]:
df['sentiment'].unique()

array(['+', '−', '?'], dtype=object)

In [65]:
def encode(word):
    if word in navec:
        return navec[word]
    return navec["<unk>"]

In [20]:
words_pos = get_most_common_words('+')
words_neg = get_most_common_words('−')
words_net = get_most_common_words('?')
words_all = get_most_common_words('all')

In [23]:
words_pos = list(set(words_pos) - set(words_all))
words_neg = list(set(words_neg) - set(words_all))
words_net = list(set(words_net) - set(words_all))

print(len(words_pos))
print(len(words_neg))
print(len(words_net))

36
18
26


In [66]:
words_pos = ['подробно',
 'впечатление',
 'грамотный',
 'любой',
 'помощь',
 'доброжелательный',
 'приятный',
 'высокий',
 'тинькофф',
 'общение',
 'профессиональный',
 'отличный',
 'понравиться',
 'девушка',
 'внимательный',
 'уровень',
 'вежливый',
 'объяснять',
 'оставаться',
 'подход',
 'удобный',
 'рассказывать',
 'дело',
 'профессионализм',
 'вклад',
 'удобно',
 'хотеться',
 'оперативность',
 'оперативно',
 'плюс',
 'приятно',
 'наш',
 'положительный',
 'выражать',
 'отмечать',
 'продукт']

In [67]:
words_neg = ['написать',
 'вернуть',
 'считать',
 'обманывать',
 'абсолютно',
 'горячий',
 'некомпетентный',
 'ужасный',
 'связываться',
 'крайне',
 'сегодня',
 'отвратительный',
 'понимать',
 'советовать',
 'писать',
 'невозможно',
 'закрывать',
 'отказываться']

In [68]:
words_net = ['нужно',
 'сайт',
 'оплачивать',
 'почта',
 'сегодня',
 'принимать',
 'д',
 'история',
 'покупка',
 'написать',
 'одобрять',
 'приезжать',
 'вроде',
 'неделя',
 'несколько',
 'сообщать',
 'смс',
 'вклад',
 'рубль',
 'адрес',
 'руб',
 'думать',
 'наш',
 'открывать',
 'москва',
 'ул']

In [69]:
from scipy.spatial.distance import cosine

In [70]:
def distances_words(plenty_words, sentence):
    res = []
    for spec_word in plenty_words:
        dists = [cosine(encode(spec_word), encode(word)) for word in sentence]
        if len(dists) > 0:
            res.append(min(dists))
        else:
            res.append(1)
    return np.array(res)

In [71]:
from tqdm.notebook import tqdm

In [72]:
def get_features(name, special_words):
    all_dist = distances_words(special_words, df.iloc[0].lemmatized)
    for i in tqdm(range(1, df.shape[0])):
        all_dist = np.vstack([all_dist, distances_words(special_words, df.iloc[i].lemmatized)])
    return pd.DataFrame(all_dist, columns=[name + f'_{i}' for i in range(len(special_words))])

In [73]:
df_dist_pos = get_features('dist_pos', words_pos)
df_dist_neg = get_features('dist_neg', words_neg)
df_dist_net = get_features('dist_net', words_net)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2151.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2151.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2151.0), HTML(value='')))






In [74]:
final_df = pd.concat([df.sentence, df_dist_pos, df_dist_neg, df_dist_net], axis=1)

In [75]:
final_df.head()

Unnamed: 0,sentence,dist_pos_0,dist_pos_1,dist_pos_2,dist_pos_3,dist_pos_4,dist_pos_5,dist_pos_6,dist_pos_7,dist_pos_8,dist_pos_9,dist_pos_10,dist_pos_11,dist_pos_12,dist_pos_13,dist_pos_14,dist_pos_15,dist_pos_16,dist_pos_17,dist_pos_18,dist_pos_19,dist_pos_20,dist_pos_21,dist_pos_22,dist_pos_23,dist_pos_24,dist_pos_25,dist_pos_26,dist_pos_27,dist_pos_28,dist_pos_29,dist_pos_30,dist_pos_31,dist_pos_32,dist_pos_33,dist_pos_34,dist_pos_35,dist_neg_0,dist_neg_1,dist_neg_2,...,dist_neg_4,dist_neg_5,dist_neg_6,dist_neg_7,dist_neg_8,dist_neg_9,dist_neg_10,dist_neg_11,dist_neg_12,dist_neg_13,dist_neg_14,dist_neg_15,dist_neg_16,dist_neg_17,dist_net_0,dist_net_1,dist_net_2,dist_net_3,dist_net_4,dist_net_5,dist_net_6,dist_net_7,dist_net_8,dist_net_9,dist_net_10,dist_net_11,dist_net_12,dist_net_13,dist_net_14,dist_net_15,dist_net_16,dist_net_17,dist_net_18,dist_net_19,dist_net_20,dist_net_21,dist_net_22,dist_net_23,dist_net_24,dist_net_25
0,"Очень неприятная ситуация, надеюсь, банк либо ...",0.744485,0.581664,0.766914,0.570419,0.501763,0.698647,0.381497,0.631299,0.0,0.719439,0.616837,0.704745,0.717996,0.545505,0.774825,0.685341,0.631657,0.472426,0.573269,0.69519,0.704625,0.572899,0.483884,0.822136,0.700463,0.692539,0.820686,0.815718,0.73542,0.671747,0.505627,0.630424,0.704996,0.631811,0.752518,0.682002,0.698506,0.604761,0.473458,...,0.569874,0.627459,0.721192,0.439048,0.598686,0.345218,0.482961,0.325843,0.522335,0.587327,0.566074,0.417713,0.715301,0.636743,0.49157,0.608824,0.551571,0.74293,0.482961,0.621188,0.718793,0.591405,0.720757,0.698506,0.688968,0.65542,0.504831,0.738757,0.532063,0.614425,0.822475,0.700463,0.565213,0.681806,0.646657,0.491145,0.630424,0.626695,0.785402,0.85582
1,За что выражаю благодарность и банку и данному...,0.71515,0.782316,0.714984,0.52013,0.599262,0.839251,0.79229,0.820825,1.122423,0.755878,0.606302,0.734259,0.761491,0.841545,0.78759,0.624159,0.768624,0.675173,0.709964,0.500469,0.693155,0.767874,0.724635,0.803019,0.715636,0.753849,0.908002,0.751864,0.76788,0.792231,0.724514,0.609859,0.651066,0.0,0.737488,0.641599,0.745493,0.738395,0.643096,...,0.649975,0.886779,0.839163,0.85816,0.819767,0.692775,0.632007,0.896177,0.621094,0.689024,0.678186,0.653998,0.745496,0.807445,0.620893,0.689255,0.81654,0.806673,0.632007,0.708642,0.723549,0.800022,0.742462,0.745493,0.696288,0.835308,0.825536,0.864296,0.831907,0.665506,0.920661,0.715636,0.763955,0.76227,0.660285,0.701532,0.609859,0.729947,0.778944,0.846752
2,"Вывод: информация полученная в смс от банка, и...",0.764062,0.659579,0.568254,0.610204,0.765943,0.641289,0.647006,0.708632,0.0,0.617236,0.604967,0.433628,0.634186,0.656162,0.674572,0.675667,0.668193,0.581495,0.572624,0.551839,0.485209,0.55102,0.539345,0.786939,0.655533,0.724344,0.820686,0.793627,0.696165,0.703865,0.680089,0.606788,0.506843,0.619089,0.598181,0.672255,0.68679,0.775839,0.61917,...,0.652914,0.759711,0.721192,0.75011,0.699934,0.718169,0.629593,0.830791,0.60192,0.490316,0.634758,0.586094,0.628441,0.680918,0.447609,0.0,0.570097,0.653849,0.629593,0.547484,0.739005,0.661509,0.77111,0.68679,0.718549,0.582742,0.70338,0.772019,0.76225,0.457938,0.0,0.655533,0.687162,0.580235,0.718057,0.560734,0.606788,0.670198,0.785402,0.745285
3,Хочу по благодарить ее за чуткое отношение к н...,0.804942,0.608952,0.67841,0.545613,0.689622,0.497566,0.639077,0.552814,1.021174,0.663675,0.731894,0.708206,0.699118,0.657581,0.43246,0.76865,0.583088,0.648124,0.695467,0.552334,0.830565,0.679669,0.694665,0.700263,0.834056,0.859643,0.528749,0.753119,0.906669,0.749413,0.756328,0.595099,0.792694,0.695038,0.756178,0.829257,0.807196,0.808775,0.73977,...,0.678177,0.804223,0.861014,0.769555,0.82934,0.737491,0.670491,0.783761,0.564144,0.678978,0.726564,0.637049,0.794961,0.616265,0.630223,0.88447,0.791886,0.918118,0.670491,0.733987,0.745125,0.76546,0.980372,0.807196,0.742338,0.689551,0.723856,0.916642,0.678226,0.779972,0.993579,0.834056,0.882906,0.833635,0.892132,0.559461,0.595099,0.819582,0.906249,0.984211
4,"Показал, что я и вклад могу свой пополнять пря...",0.775963,0.610825,0.899434,0.58852,0.794865,0.938086,0.785011,0.702296,0.0,0.788727,0.641693,0.662788,0.655113,0.524262,0.900262,0.69521,0.835282,0.507652,0.708766,0.684713,0.690866,0.511932,0.586432,0.746632,0.0,0.676342,0.719905,0.815718,0.804659,0.734944,0.605978,0.473157,0.579541,0.57257,0.716094,0.743556,0.66042,0.664267,0.70048,...,0.4897,0.725285,0.721192,0.678083,0.742982,0.699123,0.500276,0.743101,0.564176,0.738063,0.691193,0.505005,0.669209,0.715489,0.540168,0.624313,0.433535,0.675902,0.500276,0.701302,0.845379,0.724429,0.737471,0.66042,0.799922,0.6673,0.548067,0.699794,0.652537,0.569633,0.822475,0.0,0.761742,0.727901,0.673196,0.56224,0.473157,0.574913,0.720194,0.879123


In [76]:
final_df.columns

Index(['sentence', 'dist_pos_0', 'dist_pos_1', 'dist_pos_2', 'dist_pos_3',
       'dist_pos_4', 'dist_pos_5', 'dist_pos_6', 'dist_pos_7', 'dist_pos_8',
       'dist_pos_9', 'dist_pos_10', 'dist_pos_11', 'dist_pos_12',
       'dist_pos_13', 'dist_pos_14', 'dist_pos_15', 'dist_pos_16',
       'dist_pos_17', 'dist_pos_18', 'dist_pos_19', 'dist_pos_20',
       'dist_pos_21', 'dist_pos_22', 'dist_pos_23', 'dist_pos_24',
       'dist_pos_25', 'dist_pos_26', 'dist_pos_27', 'dist_pos_28',
       'dist_pos_29', 'dist_pos_30', 'dist_pos_31', 'dist_pos_32',
       'dist_pos_33', 'dist_pos_34', 'dist_pos_35', 'dist_neg_0', 'dist_neg_1',
       'dist_neg_2', 'dist_neg_3', 'dist_neg_4', 'dist_neg_5', 'dist_neg_6',
       'dist_neg_7', 'dist_neg_8', 'dist_neg_9', 'dist_neg_10', 'dist_neg_11',
       'dist_neg_12', 'dist_neg_13', 'dist_neg_14', 'dist_neg_15',
       'dist_neg_16', 'dist_neg_17', 'dist_net_0', 'dist_net_1', 'dist_net_2',
       'dist_net_3', 'dist_net_4', 'dist_net_5', 'dist_net_6', 'd

In [77]:
final_df.to_csv("TEST_distances_sentiment.csv")

In [154]:
df['dist_pos'] = [distances_words(words_pos, sentence) for sentence in tqdm(df['lemmatized'])]
df['dist_neg'] = [distances_words(words_neg, sentence) for sentence in tqdm(df['lemmatized'])]
df['dist_net'] = [distances_words(words_net, sentence) for sentence in tqdm(df['lemmatized'])]

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19361.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19361.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19361.0), HTML(value='')))






In [157]:
df.head()

Unnamed: 0.1,Unnamed: 0,sentence,1category,2category,sentiment,lemmatized,dist_pos,dist_neg,dist_net
0,4754,При этом всегда получал качественные услуги.,Communication,,+,"[это, получать, качественный, услуга]","[0.563639760017395, 0.5634748339653015, 0.6045...","[0.7360561192035675, 0.4558261036872864, 0.788...","[0.6259698569774628, 0.6318686604499817, 0.794..."
1,4417,"Не вижу, за что хотя бы 2 поставить, сервис на 1!",?,,−,"[видеть, хотя, поставлять, сервис]","[0.5922351181507111, 0.6998542547225952, 0.602...","[0.7729345411062241, 0.6385719478130341, 0.801...","[0.7952060848474503, 0.7497720122337341, 0.554..."
2,3629,"Вот так ""Мой любимый"" банк МКБ меня обманул.",?,,−,"[любимый, банк, мкб, обманывать]","[0.8300695419311523, 0.5762559175491333, 0.894...","[0.8049450218677521, 0.6747770309448242, 0.675...","[0.8406572937965393, 0.8336348682641983, 0.711..."
3,11640,Отвратительное отношение к клиентам.,Communication,,−,"[отвратительный, отношение, клиент]","[0.6871592104434967, 0.7127627432346344, 0.688...","[0.7631731331348419, 0.8094930648803711, 0.674...","[0.47836410999298096, 0.747184157371521, 0.639..."
4,5571,"Всегда в любое время дня и ночи помогут, ответ...",Communication,,+,"[любой, время, день, ночь, помогать, отвечать,...","[0.5456125140190125, 0.6357545256614685, 0.684...","[0.7394724190235138, 0.5583707690238953, 0.758...","[0.7702769786119461, 0.758133590221405, 0.7482..."


In [156]:
df.to_csv('train_with_dist_sentiment.csv')

In [74]:
final_df = pd.read_csv('/home/jupyter/mnt/s3/bank-data/features/train_with_bow.csv')

In [75]:
final_df.head()

Unnamed: 0.1,Unnamed: 0


In [159]:
my_df = pd.read_csv('train_with_dist_sentiment.csv')

In [162]:
my_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,sentence,1category,2category,sentiment,lemmatized,dist_pos,dist_neg,dist_net
0,0,4754,При этом всегда получал качественные услуги.,Communication,,+,"['это', 'получать', 'качественный', 'услуга']","[0.563639760017395, 0.5634748339653015, 0.6045...","[0.7360561192035675, 0.4558261036872864, 0.788...","[0.6259698569774628, 0.6318686604499817, 0.794..."
1,1,4417,"Не вижу, за что хотя бы 2 поставить, сервис на 1!",?,,−,"['видеть', 'хотя', 'поставлять', 'сервис']","[0.5922351181507111, 0.6998542547225952, 0.602...","[0.7729345411062241, 0.6385719478130341, 0.801...","[0.7952060848474503, 0.7497720122337341, 0.554..."
2,2,3629,"Вот так ""Мой любимый"" банк МКБ меня обманул.",?,,−,"['любимый', 'банк', 'мкб', 'обманывать']","[0.8300695419311523, 0.5762559175491333, 0.894...","[0.8049450218677521, 0.6747770309448242, 0.675...","[0.8406572937965393, 0.8336348682641983, 0.711..."
3,3,11640,Отвратительное отношение к клиентам.,Communication,,−,"['отвратительный', 'отношение', 'клиент']","[0.6871592104434967, 0.7127627432346344, 0.688...","[0.7631731331348419, 0.8094930648803711, 0.674...","[0.47836410999298096, 0.747184157371521, 0.639..."
4,4,5571,"Всегда в любое время дня и ночи помогут, ответ...",Communication,,+,"['любой', 'время', 'день', 'ночь', 'помогать',...","[0.5456125140190125, 0.6357545256614685, 0.684...","[0.7394724190235138, 0.5583707690238953, 0.758...","[0.7702769786119461, 0.758133590221405, 0.7482..."


In [164]:
my_df.dist_pos[0]

'[0.563639760017395, 0.5634748339653015, 0.6045732498168945, 0.8268502950668335, 0.6773145198822021, 0.6813562214374542, 0.8713621497154236, 0.6075133681297302, 0.7854927778244019, 0.5269941985607147, 0.511738508939743, 0.325394868850708, 0.6022545099258423, 0.6189061999320984, 0.7043701112270355, 0.5396408438682556, 0.37467634677886963, 0.6985333859920502, 0.7399607002735138, 0.6976710855960846, 0.7561313211917877, 0.7910308390855789, 0.7613090574741364, 0.6113036274909973, 0.7900069206953049, 0.6060005724430084, 0.653067946434021, 0.4662814736366272, 0.6646382808685303, 0.7968946695327759, 0.6795551776885986, 0.8368730843067169, 0.6643068194389343, 0.9219286292791367, 0.5908538997173309, 0.7304320335388184, 0.7111903131008148, 0.4381871819496155, 0.6982463002204895, 0.6788468658924103, 0.5610130727291107, 0.7868746072053909, 0.8790600597858429, 0.5843687355518341, 0.7062045037746429, 0.8864414542913437, 0.6362067461013794, 0.5415167808532715, 0.8055980354547501, 0.8017240613698959]'

In [167]:
def parse(s):
    return np.array(list(map(float, s[1:-1].split(', '))))

In [170]:
all_pos_dist = parse(my_df.iloc[0].dist_pos)
all_neg_dist = parse(my_df.iloc[0].dist_neg)
all_net_dist = parse(my_df.iloc[0].dist_net)

In [171]:
for i in range(1, len(my_df)):
    line = my_df.iloc[i]
    all_pos_dist = np.vstack([all_pos_dist, parse(line.dist_pos)])
    all_neg_dist = np.vstack([all_neg_dist, parse(line.dist_neg)])
    all_net_dist = np.vstack([all_net_dist, parse(line.dist_net)])

In [172]:
print(all_pos_dist.shape)
print(all_neg_dist.shape)
print(all_net_dist.shape)

(19361, 50)
(19361, 50)
(19361, 50)


In [175]:
final_df = pd.concat([my_df.sentence, pd.DataFrame(all_pos_dist), pd.DataFrame(all_neg_dist), pd.DataFrame(all_net_dist)], axis=1)

In [176]:
final_df.shape

(19361, 151)

['sentence',
 'dist_pos_1',
 'dist_pos_2',
 'dist_pos_3',
 'dist_pos_4',
 'dist_pos_5',
 'dist_pos_6',
 'dist_pos_7',
 'dist_pos_8',
 'dist_pos_9',
 'dist_pos_10',
 'dist_pos_11',
 'dist_pos_12',
 'dist_pos_13',
 'dist_pos_14',
 'dist_pos_15',
 'dist_pos_16',
 'dist_pos_17',
 'dist_pos_18',
 'dist_pos_19',
 'dist_pos_20',
 'dist_pos_21',
 'dist_pos_22',
 'dist_pos_23',
 'dist_pos_24',
 'dist_pos_25',
 'dist_pos_26',
 'dist_pos_27',
 'dist_pos_28',
 'dist_pos_29',
 'dist_pos_30',
 'dist_pos_31',
 'dist_pos_32',
 'dist_pos_33',
 'dist_pos_34',
 'dist_pos_35',
 'dist_pos_36',
 'dist_pos_37',
 'dist_pos_38',
 'dist_pos_39',
 'dist_pos_40',
 'dist_pos_41',
 'dist_pos_42',
 'dist_pos_43',
 'dist_pos_44',
 'dist_pos_45',
 'dist_pos_46',
 'dist_pos_47',
 'dist_pos_48',
 'dist_pos_49',
 'dist_pos_50',
 'dist_neg_1',
 'dist_neg_2',
 'dist_neg_3',
 'dist_neg_4',
 'dist_neg_5',
 'dist_neg_6',
 'dist_neg_7',
 'dist_neg_8',
 'dist_neg_9',
 'dist_neg_10',
 'dist_neg_11',
 'dist_neg_12',
 'dist_neg_13

In [178]:
final_df.columns = ['sentence'] + [f"dist_pos_{i}" for i in range(1, 51)] + [f"dist_neg_{i}" for i in range(1, 51)] + [f"dist_net_{i}" for i in range(1, 51)]

In [179]:
final_df.head()

Unnamed: 0,sentence,dist_pos_1,dist_pos_2,dist_pos_3,dist_pos_4,dist_pos_5,dist_pos_6,dist_pos_7,dist_pos_8,dist_pos_9,dist_pos_10,dist_pos_11,dist_pos_12,dist_pos_13,dist_pos_14,dist_pos_15,dist_pos_16,dist_pos_17,dist_pos_18,dist_pos_19,dist_pos_20,dist_pos_21,dist_pos_22,dist_pos_23,dist_pos_24,dist_pos_25,dist_pos_26,dist_pos_27,dist_pos_28,dist_pos_29,dist_pos_30,dist_pos_31,dist_pos_32,dist_pos_33,dist_pos_34,dist_pos_35,dist_pos_36,dist_pos_37,dist_pos_38,dist_pos_39,...,dist_net_11,dist_net_12,dist_net_13,dist_net_14,dist_net_15,dist_net_16,dist_net_17,dist_net_18,dist_net_19,dist_net_20,dist_net_21,dist_net_22,dist_net_23,dist_net_24,dist_net_25,dist_net_26,dist_net_27,dist_net_28,dist_net_29,dist_net_30,dist_net_31,dist_net_32,dist_net_33,dist_net_34,dist_net_35,dist_net_36,dist_net_37,dist_net_38,dist_net_39,dist_net_40,dist_net_41,dist_net_42,dist_net_43,dist_net_44,dist_net_45,dist_net_46,dist_net_47,dist_net_48,dist_net_49,dist_net_50
0,При этом всегда получал качественные услуги.,0.56364,0.563475,0.604573,0.82685,0.677315,0.681356,0.871362,0.607513,0.785493,0.526994,0.511739,0.325395,0.602255,0.618906,0.70437,0.539641,0.374676,0.698533,0.739961,0.697671,0.756131,0.791031,0.761309,0.611304,0.790007,0.606001,0.653068,0.466281,0.664638,0.796895,0.679555,0.836873,0.664307,0.921929,0.590854,0.730432,0.71119,0.438187,0.698246,...,0.663768,0.726199,0.493996,0.672942,0.706205,0.924446,0.681356,0.827803,0.748079,0.840688,0.602255,0.555884,0.73966,0.842304,0.657227,0.59485,0.817767,0.843532,0.749994,0.570097,0.456691,0.722134,0.715461,0.698424,0.681406,0.794674,0.604929,0.440418,0.573373,0.853535,0.769645,0.739961,0.679161,0.744761,0.660604,0.584369,0.551746,0.382294,0.625063,0.541154
1,"Не вижу, за что хотя бы 2 поставить, сервис на 1!",0.592235,0.699854,0.602334,0.691415,0.78329,0.748348,0.890328,0.71187,0.767373,0.677952,0.715081,0.318097,0.555896,0.0,0.719439,0.532464,0.39725,0.846544,0.825369,0.723575,0.71352,0.859307,0.756856,0.746809,0.758542,0.660923,0.69518,0.474854,0.712624,0.836341,0.808552,0.802712,0.694732,0.902253,0.576117,0.784415,0.727418,0.483884,0.580997,...,0.634541,0.723414,0.581982,0.856826,0.846838,0.826977,0.748348,0.832751,0.790839,0.613715,0.555896,0.626793,0.829058,0.85583,0.640282,0.621167,0.872314,0.782202,0.793538,0.582721,0.482961,0.777495,0.698763,0.781379,0.698506,0.77055,0.840832,0.49157,0.696643,0.820352,0.814345,0.825369,0.655723,0.821223,0.712268,0.630424,0.550116,0.288571,0.532063,0.525663
2,"Вот так ""Мой любимый"" банк МКБ меня обманул.",0.83007,0.576256,0.894284,0.842451,0.925448,0.759418,0.897289,0.653186,0.824179,0.550443,0.86906,0.743772,0.856694,0.772065,0.882951,0.639414,0.73542,0.91425,0.785118,0.699959,0.816599,0.877733,0.813853,0.707285,0.57372,0.800277,0.841217,0.804236,0.890761,0.719289,0.806085,0.957085,0.821176,0.659241,0.759423,0.67693,0.891091,0.824627,0.742756,...,0.792093,0.714284,0.754613,0.851876,0.62541,0.824958,0.759418,0.400574,0.710632,0.659231,0.856694,0.486155,0.739217,0.785402,0.853376,0.889806,0.687162,0.817522,0.803852,0.779243,0.659354,0.80044,0.843081,0.740775,0.820585,0.89527,0.810957,0.705759,0.720244,0.857992,0.712101,0.785118,0.90505,0.718057,0.719754,0.539725,0.862632,0.782303,0.895211,0.657573
3,Отвратительное отношение к клиентам.,0.687159,0.712763,0.688374,0.881012,0.822136,0.803592,0.954958,0.725874,0.811635,0.547289,0.682002,0.664388,0.635821,0.684695,0.663675,0.75612,0.736641,0.777411,0.834056,0.837066,0.807777,0.919692,0.699118,0.713983,0.757789,0.749627,0.821164,0.773698,0.76865,0.726647,0.79461,0.829918,0.791998,0.906614,0.628393,0.660462,0.817368,0.718993,0.793414,...,0.791631,0.830752,0.775713,0.877115,0.692904,0.973786,0.803592,0.660292,0.664062,0.453752,0.635821,0.575983,0.708634,0.90756,0.759947,0.7017,0.818867,0.797177,0.884224,0.763197,0.720639,0.745125,0.89269,0.891967,0.807196,0.908751,0.83129,0.705851,0.794713,0.670858,0.737795,0.834056,0.856597,0.871679,0.798442,0.659338,0.792868,0.588442,0.833514,0.828155
4,"Всегда в любое время дня и ночи помогут, ответ...",0.545613,0.635755,0.68419,0.72287,0.882303,0.0,0.856606,0.472426,0.80693,0.585464,0.673702,0.514122,0.595972,0.841492,0.770691,0.488068,0.58852,0.666326,0.797814,0.572899,0.747815,0.734636,0.717996,0.676953,0.946811,0.633074,0.501763,0.582714,0.685944,0.742056,0.759185,0.802862,0.766138,1.095854,0.0,0.677701,0.611207,0.576098,0.589947,...,0.365453,0.626695,0.452574,0.977676,0.882332,0.823529,0.0,0.860503,0.729326,0.745721,0.595972,0.436192,0.702079,0.801969,0.680465,0.561491,0.830979,0.882781,0.75813,0.688514,0.355247,0.784346,0.717858,0.65542,0.703201,0.512498,0.933354,0.520143,0.549932,0.757265,0.570241,0.797814,0.672911,0.82833,0.614425,0.588129,0.656092,0.566019,0.515005,0.587285


In [180]:
final_df.to_csv("distances_sentiments.csv")