In [2]:
from catboost import CatBoostClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
%matplotlib inline
import joblib
from gensim.models.fasttext import FastText
import pandas as pd
import numpy as np
import re
import difflib
import xlsxwriter
from tqdm.notebook import tqdm
from termcolor import colored, cprint
tqdm.pandas()

In [3]:
joblib.__version__

'1.1.1'

In [4]:
def tfidf_featuring(tfidf, df):   
    '''–ü—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –≤ –º–µ—à–æ–∫ —Å–ª–æ–≤'''
    X_tfidf = tfidf.transform(df['text'])
    feature_names = tfidf.get_feature_names_out()
    X_tfidf = pd.DataFrame(X_tfidf.toarray(), columns = feature_names, index = df.index)
    
    return X_tfidf

def sentences_split(text):
    # —Ä–∞–∑–±–∏–≤–∞–µ—Ç —Ç–µ–∫—Å—Ç –Ω–∞ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è
    try:
        # TODO –æ—Å—Ç–∞–≤–∏—Ç—å —Ç–æ—á–∫–∏ –ø–æ—Å–ª–µ —á–∏—Å–ª–∞ (?<![0-9])\.  (?<![0-9])\.(?![0-9])
        # TODO —É–¥–∞–ª–∏—Ç—å —Ç–µ–∫—Å—Ç –¥–æ —Ç—Ä–µ–±—É—é—É—Ç—Å—è, —Ç—Ä—É–±—É–µ—Ç—Å—è r'—Ç—Ä–µ–±—É—é—Ç—Å—è:([^<>]+)'
        #—Ä–∞–∑–¥–µ–ª–µ–Ω–∏–µ –Ω–∞ —Ñ—Ä–∞–∑—ã. –∏–≥–Ω–æ—Ä–∏—Ä–æ–≤–∞—Ç—å —Ç–æ—á–∫—É –ø–æ—Å–ª–µ —á–∏—Å–ª–∞. —Å–º–∞–π–ª–∏–∫–∏ - —Ä–∞–∑–¥–µ–ª–∏—Ç–µ–ª–∏ |[^\x00-\x7F–∞-—è–ê-–Ø]
        pattern = r'\;| ,|(?<![0-9])\.|\n|\‚Ä¢|‚Äî|-|!|–æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏|—Ç—Ä–µ–±–æ–≤–∞–Ω–∏—è|—É—Å–ª–æ–≤–∏—è|[^\x00-\x7F–∞-—è–ê-–Ø]'
        sentences = re.compile(pattern).split(text.lower()) 
        #—É–¥–∞–ª—è–µ–º –Ω–µ –±—É–∫–≤—ã –≤ –Ω–∞—á–∞–ª–µ –∏ –∫–æ–Ω—Ü–µ —Ñ—Ä–∞–∑—ã
        pattern = r'^[^–∞-—è–ê-–Ø—ë–Å]+|[^–∞-—è–ê-–Ø—ë–Å]+$'
        sentences = [re.sub(pattern, '', sen) for sen in sentences]
        return[sen for sen in sentences if len(sen)>0]
    except:
        return []

In [5]:
def sentences_df(df, part=None):
    # –¥–∞—Ç–∞—Ñ—Ä–µ–π–º –¥–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è, part —Å–∫–æ–ª—å–∫–æ —Å—Ç—Ä–æ–∫ –∏–∑–≤–¥–ª–µ–∫–∞–µ–º
    test_=[]
    idxs = df.index.tolist()
    for idx in idxs[0:part]:
        text = df['responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)'][idx]
        #print(sentences_split(text))
        test_.append(sentences_split(text))
    return test_

In [6]:
def sort_respons(sentences):
    # —Å–æ—Ä—Ç–∏—Ä—É–µ—Ç –∫–æ–ª–æ–Ω–∫—É –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏ –Ω–∞ —Ç—Ä–∏ —Ä–∞–∑–Ω—ã—Ö
    responsibilities = []
    requirements =[]
    terms=[]
    for idx in tqdm(range(0, len(sentences))):
        test_tfidf = tfidf_featuring(tfidf, pd.DataFrame({"text": sentences[idx]}))
        catc_proba = model.predict_proba(test_tfidf)

        temp_ = pd.DataFrame({"text": sentences[idx]})
        temp_['target']= np.argmax(catc_proba, axis=1)
        temp_['proba']= np.amax(catc_proba, axis=1)
        temp_['target']= temp_['target'].replace({0: "–û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏", 1: '–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è', 2: '–£—Å–ª–æ–≤–∏—è —Ä–∞–±–æ—Ç—ã'}, regex=True)
        #–ø–æ–∫–∞ –±–µ–∑ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–µ–π
        resp= temp_[(temp_['target']=='–û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏')&(temp_['proba']>=0.75)]['text'].tolist()
        responsibilities.append(". ".join([i.capitalize() for i in resp]))
        req = temp_[(temp_['target']=='–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è')&(temp_['proba']>=0.75)]['text'].tolist()
        requirements.append(". ".join([i.capitalize() for i in req]))
        ter = temp_[(temp_['target']=='–£—Å–ª–æ–≤–∏—è —Ä–∞–±–æ—Ç—ã')&(temp_['proba']>=0.75)]['text'].tolist()
        terms.append(". ".join([i.capitalize() for i in ter]))
        
    return responsibilities, requirements, terms

In [7]:
model = CatBoostClassifier(loss_function='MultiClass', random_state=42)
tfidf = TfidfVectorizer()

In [8]:
# –∑–∞–≥—Ä—É–∑–∫–∞ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞
model.load_model('model/model.cbm')
# –∑–∞–≥—Ä—É–∑–∫–∞ –≤–µ–∫—Ç–æ—Ä–∏–∑–∞—Ç–æ—Ä–∞
tfidf = joblib.load('model/tfidf.pkl')
#model = FastText.load("model/fasttext.model")

In [9]:
data = pd.read_excel('data/–î–∞—Ç–∞—Å–µ—Ç.xlsx', index_col=0)

In [10]:
data

Unnamed: 0_level_0,name(–Ω–∞–∑–≤–∞–Ω–∏–µ),specialization(—Å–ø–µ—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è),responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏),requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è),skills(–ö–ª—é—á–µ–≤—ã–µ –Ω–∞–≤—ã–∫–∏),salary_from,salary_to,object,city,...,updated_by,position,phone,website,email,image,unique_code,city_code,source_id,link_resource
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9495846,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–†–∞–±–æ—Ç–∞ –Ω–∞ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å–Ω—ã—Ö –ø–ª–æ—â–∞–¥–∫–∞—Ö. –û—á–∏—Å—Ç–Ω—ã—Ö —Å–æ–æ...,,,,150000,170000,,–î–∏–º–∏—Ç—Ä–æ–≤–≥—Ä–∞–¥,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-81058502,\N,1,https://hh.ru/vacancy/81058502
9495850,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ì–Ω—É—Ç—å–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π —Å—Ç–∞–ª–∏ –Ω–∞ –º–µ—Ö–∞–Ω–∏—á–µ—Å–∫–∏—Ö —Å—Ç–∞–Ω–∫–∞...,,,,130000,150000,,–ö–æ–ø–µ–π—Å–∫,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-80163818,\N,1,https://hh.ru/vacancy/80163818
9495851,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í—è–∑–∫–∞ –∞—Ä–º–∞—Ç—É—Ä—ã.,,,,85000,90000,,–Ø—Ä–æ—Å–ª–∞–≤–ª—å,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-77729491,\N,1,https://hh.ru/vacancy/77729491
2,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,,\N,50000,100000,"–û–û–û ""–ê–õ–¨–ú–ò–°_–ò–ù–¢–ï–ì–†–ê–õ""",\N,...,1,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,74959214241,https://almisintegral.ru/,info@almisintegral.ru,\N,\N,\N,1,\N
9388857,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫ –í–∞—Ö—Ç–∞ –≤ –ºc–∫ (–ø–∏—Ç–∞–Ω–∏–µ+–ø—Ä–æ–∂–∏–≤–∞–Ω–∏–µ) 60/30,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –ú–æ—Å–∫–≤–∞. –û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏: - –∞—Ä–º–∏—Ä–æ–≤–∞...,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è; —Ä–∞–±–æ—Ç–∞ –≤ ...,–ø—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã 60/30 (–ø—Ä–æ–¥–ª–µ–Ω–∏–µ –≤–∞—Ö—Ç—ã...,,67000,134000,,–ö–∏—Ä–æ–≤—Å–∫,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,avito,avito@avito.ru,\N,A-3037298578,\N,1,https://avito.ru/3037298578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9306889,–ò–Ω–∂–µ–Ω–µ—Ä,–ò–Ω–∂–µ–Ω–µ—Ä –∫–æ–Ω—Å—Ç—Ä—É–∫—Ç–æ—Ä,–û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏: –ü—Ä–æ–µ–∫—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ —É–ª—å—Ç—Ä–∞–∑–≤—É–∫–æ–≤—ã—Ö –ø—Ä–µ...,,–æ—Ñ–∏—Å–Ω—ã–µ —É—Å–ª–æ–≤–∏—è,,35000,35000,,–¢–∞–≥–∞–Ω—Ä–æ–≥,...,\N,–ò–Ω–∂–µ–Ω–µ—Ä,,avito,avito@avito.ru,\N,A-1580492578,\N,1,https://avito.ru/1580492578
9306890,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫ –≤–µ–Ω—Ç–∏–ª—è—Ü–∏–∏/—Ä–∞–∑–Ω–æ—Ä–∞–±–æ—á–∏–π,"–í–Ω–∏–º–∞–Ω–∏–µ –ø–æ–∫–∞ –æ–±—å—è–≤–ª–µ–Ω–∏–µ –≤–∏—Å–∏—Ç, –≤–µ–¥–µ—Ç—Å—è –Ω–∞–±–æ—Ä!...",–ü—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è –æ–ø—ã—Ç –ø–æ —É—Å—Ç–∞–Ω–æ–≤–∫–µ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü...,–û–ø–ª–∞—Ç–∞ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—Å—è —Ä–∞–∑ –≤ –Ω–µ–¥–µ–ª—é –ø–æ —Å—É–±–±–æ—Ç–∞–º!...,,1800,46800,,–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä,...,\N,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,,avito,avito@avito.ru,\N,A-1578246023,\N,1,https://avito.ru/1578246023
9306891,–°–ª–µ—Å–∞—Ä—å-–†–µ–º–æ–Ω—Ç–Ω–∏–∫,–°–ª–µ—Å–∞—Ä—å - –∞–≤—Ç–æ–º–µ—Ö–∞–Ω–∏–∫ –≤ —Ü–µ—Ö –ø–æ —Ä–µ–º–æ–Ω—Ç—É –∫–∞—Ä–¥–∞–Ω–æ–≤,–¢—Ä–µ–±—É–µ—Ç—Å—è —Å–ª–µ—Å–∞—Ä—å –≤ —Ü–µ—Ö –ø–æ —Ä–µ–º–æ–Ω—Ç—É –∏ –±–∞–ª–∞–Ω—Å–∏—Ä–æ...,–û–±—É—á–∞–µ–º. –¢–æ–∫–∞—Ä–Ω—ã–µ –Ω–∞–≤—ã–∫–∏ –∏ —Å–≤–∞—Ä–æ—á–Ω—ã–µ –Ω–∞–≤—ã–∫–∏ –ø—Ä...,"–ó–∞—Ä–ø–ª–∞—Ç–∞, –≥—Ä–∞—Ñ–∏–∫ —Ä–∞–±–æ—Ç—ã –æ–±—Å—É–∂–¥–∞—é—Ç—Å—è.",,50000,50000,,–ú–æ—Å–∫–≤–∞,...,\N,–°–ª–µ—Å–∞—Ä—å-–†–µ–º–æ–Ω—Ç–Ω–∏–∫,,avito,avito@avito.ru,\N,A-928159469,\N,1,https://avito.ru/928159469
9306892,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,"–û–∫–ª–µ–π—â–∏–∫ –∞–≤—Ç–æ–º–æ–±–∏–ª–µ–π, –ø–æ–ª–∏—É—Ä–µ—Ç–∞–Ω –≤–∏–Ω–∏–ª, –∞—Ä–º–∞—Ç—É...",üî• –ö –Ω–∞–º –≤ –∫–æ–º–∞–Ω–¥—É —Ç—Ä–µ–±—É—é—Ç—Å—è: üî∏ –ú–∞—Å—Ç–µ—Ä –ø–æ –æ–∫–ª–µ...,"–ø—ã—Ç —Ä–∞–±–æ—Ç—ã, –∑–Ω–∞–Ω–∏–µ –∏ –ø–æ–Ω–∏–º–∞–Ω–∏–µ –ø—Ä–∏–Ω—Ü–∏–ø–æ–≤ —Ä–∞–±–æ—Ç...",,,120000,200000,,–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,avito,avito@avito.ru,\N,A-1047100555,\N,1,https://avito.ru/1047100555


In [11]:
sentences = sentences_df(data)

In [12]:
responsibilities, requirements, terms = sort_respons(sentences)

  0%|          | 0/999 [00:00<?, ?it/s]

In [13]:
#data['responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)'] = responsibilities
data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'] = requirements
data['terms(–£—Å–ª–æ–≤–∏—è)'] = terms

In [14]:
data[['name(–Ω–∞–∑–≤–∞–Ω–∏–µ)', 'responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)', 'requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)', 'terms(–£—Å–ª–æ–≤–∏—è)']]

Unnamed: 0_level_0,name(–Ω–∞–∑–≤–∞–Ω–∏–µ),responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏),requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è)
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9495846,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–†–∞–±–æ—Ç–∞ –Ω–∞ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å–Ω—ã—Ö –ø–ª–æ—â–∞–¥–∫–∞—Ö. –û—á–∏—Å—Ç–Ω—ã—Ö —Å–æ–æ...,,
9495850,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ì–Ω—É—Ç—å–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π —Å—Ç–∞–ª–∏ –Ω–∞ –º–µ—Ö–∞–Ω–∏—á–µ—Å–∫–∏—Ö —Å—Ç–∞–Ω–∫–∞...,,
9495851,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í—è–∑–∫–∞ –∞—Ä–º–∞—Ç—É—Ä—ã.,,
2,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,,
9388857,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –ú–æ—Å–∫–≤–∞. –û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏: - –∞—Ä–º–∏—Ä–æ–≤–∞...,–û–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –º–æ—Å–∫–≤–∞. –ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã ...
...,...,...,...,...
9306889,–ò–Ω–∂–µ–Ω–µ—Ä,–û–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏: –ü—Ä–æ–µ–∫—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ —É–ª—å—Ç—Ä–∞–∑–≤—É–∫–æ–≤—ã—Ö –ø—Ä–µ...,–ó–Ω–∞–Ω–∏–µ –∫–æ–Ω—Å—Ç—Ä—É–∫—Ç–æ—Ä—Å–∫–∏—Ö –ø—Ä–æ–≥—Ä–∞–º–º –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,
9306890,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,"–í–Ω–∏–º–∞–Ω–∏–µ –ø–æ–∫–∞ –æ–±—å—è–≤–ª–µ–Ω–∏–µ –≤–∏—Å–∏—Ç, –≤–µ–¥–µ—Ç—Å—è –Ω–∞–±–æ—Ä!...",–ü—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è –æ–ø—ã—Ç –ø–æ —É—Å—Ç–∞–Ω–æ–≤–∫–µ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü...,–û–ø–ª–∞—Ç–∞ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—Å—è —Ä–∞–∑ –≤ –Ω–µ–¥–µ–ª—é –ø–æ —Å—É–±–±–æ—Ç–∞–º. ...
9306891,–°–ª–µ—Å–∞—Ä—å-–†–µ–º–æ–Ω—Ç–Ω–∏–∫,–¢—Ä–µ–±—É–µ—Ç—Å—è —Å–ª–µ—Å–∞—Ä—å –≤ —Ü–µ—Ö –ø–æ —Ä–µ–º–æ–Ω—Ç—É –∏ –±–∞–ª–∞–Ω—Å–∏—Ä–æ...,"–†–∞–±–æ—Ç–µ –Ω–∞ –±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–æ—á–Ω–æ–º, —Å–≤–∞—Ä–æ—á–Ω–æ–º —Å—Ç–µ–Ω–¥–µ. –¢...","–ó–∞—Ä–ø–ª–∞—Ç–∞, –≥—Ä–∞—Ñ–∏–∫ —Ä–∞–±–æ—Ç—ã –æ–±—Å—É–∂–¥–∞—é—Ç—Å—è"
9306892,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,üî• –ö –Ω–∞–º –≤ –∫–æ–º–∞–Ω–¥—É —Ç—Ä–µ–±—É—é—Ç—Å—è: üî∏ –ú–∞—Å—Ç–µ—Ä –ø–æ –æ–∫–ª–µ...,"–û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã, –∑–Ω–∞–Ω–∏–µ –∏ –ø–æ–Ω–∏–º–∞–Ω–∏–µ –ø—Ä–∏–Ω—Ü–∏–ø–æ–≤ —Ä–∞–±–æ...",


In [15]:
data['responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)'] = responsibilities

In [16]:
data

Unnamed: 0_level_0,name(–Ω–∞–∑–≤–∞–Ω–∏–µ),specialization(—Å–ø–µ—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è),responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏),requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è),skills(–ö–ª—é—á–µ–≤—ã–µ –Ω–∞–≤—ã–∫–∏),salary_from,salary_to,object,city,...,updated_by,position,phone,website,email,image,unique_code,city_code,source_id,link_resource
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9495846,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,,,,150000,170000,,–î–∏–º–∏—Ç—Ä–æ–≤–≥—Ä–∞–¥,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-81058502,\N,1,https://hh.ru/vacancy/81058502
9495850,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ì–Ω—É—Ç—å–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π —Å—Ç–∞–ª–∏ –Ω–∞ –º–µ—Ö–∞–Ω–∏—á–µ—Å–∫–∏—Ö —Å—Ç–∞–Ω–∫–∞...,,,,130000,150000,,–ö–æ–ø–µ–π—Å–∫,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-80163818,\N,1,https://hh.ru/vacancy/80163818
9495851,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,,,,85000,90000,,–Ø—Ä–æ—Å–ª–∞–≤–ª—å,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,hh,hh@hh.ru,\N,HH-77729491,\N,1,https://hh.ru/vacancy/77729491
2,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,,,\N,50000,100000,"–û–û–û ""–ê–õ–¨–ú–ò–°_–ò–ù–¢–ï–ì–†–ê–õ""",\N,...,1,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,74959214241,https://almisintegral.ru/,info@almisintegral.ru,\N,\N,\N,1,\N
9388857,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫ –í–∞—Ö—Ç–∞ –≤ –ºc–∫ (–ø–∏—Ç–∞–Ω–∏–µ+–ø—Ä–æ–∂–∏–≤–∞–Ω–∏–µ) 60/30,–ü–æ–º–æ—â—å –≤ –ø—Ä–æ—Ö–æ–∂–¥–µ–Ω–∏–µ –º–µ–¥–∏—Ü–∏–Ω—Å–∫–æ–≥–æ –æ—Å–º–æ—Ç—Ä–∞,–û–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –º–æ—Å–∫–≤–∞. –ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã ...,,67000,134000,,–ö–∏—Ä–æ–≤—Å–∫,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,avito,avito@avito.ru,\N,A-3037298578,\N,1,https://avito.ru/3037298578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9306889,–ò–Ω–∂–µ–Ω–µ—Ä,–ò–Ω–∂–µ–Ω–µ—Ä –∫–æ–Ω—Å—Ç—Ä—É–∫—Ç–æ—Ä,–ü—Ä–æ–µ–∫—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ —É–ª—å—Ç—Ä–∞–∑–≤—É–∫–æ–≤—ã—Ö –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª–µ–π...,–ó–Ω–∞–Ω–∏–µ –∫–æ–Ω—Å—Ç—Ä—É–∫—Ç–æ—Ä—Å–∫–∏—Ö –ø—Ä–æ–≥—Ä–∞–º–º –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,,,35000,35000,,–¢–∞–≥–∞–Ω—Ä–æ–≥,...,\N,–ò–Ω–∂–µ–Ω–µ—Ä,,avito,avito@avito.ru,\N,A-1580492578,\N,1,https://avito.ru/1580492578
9306890,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫ –≤–µ–Ω—Ç–∏–ª—è—Ü–∏–∏/—Ä–∞–∑–Ω–æ—Ä–∞–±–æ—á–∏–π,,–ü—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è –æ–ø—ã—Ç –ø–æ —É—Å—Ç–∞–Ω–æ–≤–∫–µ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü...,–û–ø–ª–∞—Ç–∞ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—Å—è —Ä–∞–∑ –≤ –Ω–µ–¥–µ–ª—é –ø–æ —Å—É–±–±–æ—Ç–∞–º. ...,,1800,46800,,–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä,...,\N,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,,avito,avito@avito.ru,\N,A-1578246023,\N,1,https://avito.ru/1578246023
9306891,–°–ª–µ—Å–∞—Ä—å-–†–µ–º–æ–Ω—Ç–Ω–∏–∫,–°–ª–µ—Å–∞—Ä—å - –∞–≤—Ç–æ–º–µ—Ö–∞–Ω–∏–∫ –≤ —Ü–µ—Ö –ø–æ —Ä–µ–º–æ–Ω—Ç—É –∫–∞—Ä–¥–∞–Ω–æ–≤,"–°–Ω—è—Ç–∏–µ —É—Å—Ç–∞–Ω–æ–≤–∫–∞ –≤–∞–ª–æ–≤, —Ä–∞–∑–±–æ—Ä–∫–∞ —Å–±–æ—Ä–∫–∞ –∏ –±–∞–ª–∞...","–†–∞–±–æ—Ç–µ –Ω–∞ –±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–æ—á–Ω–æ–º, —Å–≤–∞—Ä–æ—á–Ω–æ–º —Å—Ç–µ–Ω–¥–µ. –¢...","–ó–∞—Ä–ø–ª–∞—Ç–∞, –≥—Ä–∞—Ñ–∏–∫ —Ä–∞–±–æ—Ç—ã –æ–±—Å—É–∂–¥–∞—é—Ç—Å—è",,50000,50000,,–ú–æ—Å–∫–≤–∞,...,\N,–°–ª–µ—Å–∞—Ä—å-–†–µ–º–æ–Ω—Ç–Ω–∏–∫,,avito,avito@avito.ru,\N,A-928159469,\N,1,https://avito.ru/928159469
9306892,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,"–û–∫–ª–µ–π—â–∏–∫ –∞–≤—Ç–æ–º–æ–±–∏–ª–µ–π, –ø–æ–ª–∏—É—Ä–µ—Ç–∞–Ω –≤–∏–Ω–∏–ª, –∞—Ä–º–∞—Ç—É...",–ú–∞—Å—Ç–µ—Ä –ø–æ –æ–∫–ª–µ–π–∫–µ –∞–≤—Ç–æ–º–æ–±–∏–ª–µ–π –≤ –∑–∞—â–∏—Ç–Ω—ã–µ –ø–ª–µ–Ω–∫...,"–û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã, –∑–Ω–∞–Ω–∏–µ –∏ –ø–æ–Ω–∏–º–∞–Ω–∏–µ –ø—Ä–∏–Ω—Ü–∏–ø–æ–≤ —Ä–∞–±–æ...",,,120000,200000,,–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥,...,\N,–ê—Ä–º–∞—Ç—É—Ä—â–∏–∫,,avito,avito@avito.ru,\N,A-1047100555,\N,1,https://avito.ru/1047100555


–°–æ—Ö—Ä–∞–Ω–∏—Ç—å –Ω–æ–≤—ã–π exel —Ñ–∞–π–ª

In [17]:
with pd.ExcelWriter("–†–µ—à–µ–Ω–∏–µ.xlsx", engine='xlsxwriter') as writer:
    data[['name(–Ω–∞–∑–≤–∞–Ω–∏–µ)', 'responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)', 
          'requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)', 'terms(–£—Å–ª–æ–≤–∏—è)', 'skills(–ö–ª—é—á–µ–≤—ã–µ –Ω–∞–≤—ã–∫–∏)'
         ]].to_excel(writer, sheet_name="–†–µ–∑—É–ª—å—Ç–∞—Ç")
    writer.sheets['–†–µ–∑—É–ª—å—Ç–∞—Ç'].set_column(1, 1, 20)
    writer.sheets['–†–µ–∑—É–ª—å—Ç–∞—Ç'].set_column(2, 4, 60)

### –ü—Ä–æ–≤–µ—Ä–∏—Ç—å —Ç–æ—á–Ω–æ—Å—Ç—å –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –¢—Ä–µ–±–æ–≤–∞–Ω–∏–π + —É—Å–ª–æ–≤–∏–π (–±–µ–∑ —Ä–∞–∑–¥–µ–ª–µ–Ω–∏—è)

In [18]:
# –∫–æ–ª–æ–Ω–∫–∞, –ø–æ–ª—É—á–µ–Ω–Ω–∞—è —Å –ø–æ–º–æ—â—å—é —Ñ–æ—Ä–º—É–ª—ã –≤ —è—á–µ–π–∫–µ –≤ —Ä–µ–∂–∏–º–µ —Ä–∞–∑—Ä–∞–±–æ—Ç—á–∏–∫–∞ exel
# —Ç–æ–ª—å–∫–æ –∑–µ–ª–µ–Ω—ã–π —Ç–µ–∫—Å—Ç responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)
data_val = pd.read_excel('data/–î–∞—Ç–∞—Å–µ—Ç_—Ä–∞–∑–¥–µ–ª–µ–Ω–Ω—ã–π.xlsx', index_col=0)[["name(–Ω–∞–∑–≤–∞–Ω–∏–µ)", "no_responsibilities"]]

In [19]:
data_val['no_responsibilities'] = data_val['no_responsibilities'].fillna("")

#### –°—Ä–∞–≤–Ω–∏—Ç—å —Å —Ç–µ–∫—Å—Ç–æ–º –≤–∞–∫–∞–Ω—Å–∏–π, –≤—ã–¥–µ–ª–µ–Ω–Ω—ã–º –∑–µ–ª–µ–Ω—ã–º  
difflib.SequenceMatcher(isjunk=None, a='', b='', autojunk=True)  
isjunk=None - —Ñ—É–Ω–∫—Ü–∏—è, –∫–æ—Ç–æ—Ä–∞—è —Ñ–∏–ª—å—Ç—Ä—É–µ—Ç –º—É—Å–æ—Ä–Ω—ã–µ —ç–ª–µ–º–µ–Ω—Ç—ã,  
a, b - —Å—Ä–∞–≤–Ω–∏–≤–∞–µ–º—ã–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏,  
autojunk=True - –æ—Ç–∫–ª—é—á–µ–Ω–∏–µ –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–π —ç–≤—Ä–∏—Å—Ç–∏–∫–∏ –º—É—Å–æ—Ä–∞.  

In [20]:
def get_similarity(data, data_val):
    # –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Å—Ö–æ–∂–µ—Å—Ç–∏ –≤—ã–¥–µ–ª–µ–Ω–Ω—ã—Ö —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π –∏ —É—Å–ª–æ–≤–∏–π —Å —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã–º–∏
    idxs = data.index.tolist()
    diffs=[]
    for idx in idxs:
        if (str(data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx])!="") & (str(data['terms(–£—Å–ª–æ–≤–∏—è)'][idx])!=""):
            str_pred = str(data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx]).lower() +" "+  str(data['terms(–£—Å–ª–æ–≤–∏—è)'][idx]).lower()
        elif str(data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx])!="":
            str_pred = str(data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx]).lower()
        else:
            str_pred = str(data['terms(–£—Å–ª–æ–≤–∏—è)'][idx]).lower()
            
        str_test = str(data_val['no_responsibilities'][idx]).lower()
        #–Ω–µ —É—á–∏—Ç—ã–≤–∞–µ–º –∑–Ω–∞–∫–∏ –ø—Ä–∏–ø–∏–Ω–∞–Ω–∏—è –∏ —Å–º–∞–π–ª–∏–∫–∏
        diff = difflib.SequenceMatcher(lambda x: x == " |;|:|.|!|,|\n|[^\x00-\x7F–∞-—è–ê-–Ø]", str_pred, str_test, autojunk=False).ratio() 
        diffs.append(round(diff, 3))
    return np.average(diffs), diffs

In [21]:
mean_diff, list_diff = get_similarity(data, data_val)

In [23]:
mean_diff

0.7182672672672673

In [24]:
temp = data_val[['no_responsibilities']].copy()
temp['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'] = data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)']
temp['terms(–£—Å–ª–æ–≤–∏—è)'] = data['terms(–£—Å–ª–æ–≤–∏—è)']
temp['similarity']=list_diff

In [70]:
temp[3:12]

Unnamed: 0_level_0,no_responsibilities,requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è),similarity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,,,0.0
9388857,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è–ø—Ä–æ–¥–æ–ª–∂–∏—Ç–µ...,–û–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –º–æ—Å–∫–≤–∞. –ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã ...,0.665
9388861,–°—Ç–∞–≤–∫–∞ –∑–∞ —Å–º–µ–Ω—É 3080 —Ä—É–± –ø—Ä–∏ –≥—Ä–∞—Ñ–∏–∫–µ 6/1 –ø–æ 11...,–†–∞–∑—Ä—è–¥–∞. –û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã –æ—Ç 1 –≥–æ–¥–∞. –ù–∞–ª–∏—á–∏–µ —É–¥–æ—Å—Ç–æ...,–ü—Ä–µ–¥–æ—Å—Ç–∞–≤–ª—è–µ–º —á–∞—Å –æ–±–µ–¥–∞ + –ø–µ—Ä–µ—Ä—ã–≤—ã. –ú–∏–Ω. –¢—Ä—É–¥–æ...,0.683
9388874,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è -—Ä–∞–±–æ—Ç–∞ –≤ ...,–û–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,–ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã 60/30 (–ø—Ä–æ–¥–ª–µ–Ω–∏–µ –≤–∞—Ö—Ç—ã...,0.7
9388888,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è; - —Ä–∞–±–æ—Ç–∞ ...,–û–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è,–í–∞—Ö—Ç–∞ –≤ –≥–æ—Ä–æ–¥ –º–æ—Å–∫–≤–∞. –ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã ...,0.689
9496164,,,,1.0
9496155,,,,1.0
9496169,,,,1.0
9336413,–ü–æ–Ω–∏–º–∞–Ω–∏–µ —Ä–∞–±–æ—Ç—ã –≤ –±—Ä–∏–≥–∞–¥–µ ¬∑ –î–∏—Å—Ü–∏–ø–ª–∏–Ω–∏—Ä–æ–≤–∞–Ω–Ω...,–ü–æ–Ω–∏–º–∞–Ω–∏–µ —Ä–∞–±–æ—Ç—ã –≤ –±—Ä–∏–≥–∞–¥–µ. –î–∏—Å—Ü–∏–ø–ª–∏–Ω–∏—Ä–æ–≤–∞–Ω–Ω–æ—Å...,"–ü—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤–∞—Ö—Ç—ã 60/30, 90/30 (–º–æ–∂–Ω–æ –±–æ...",0.808


In [26]:
temp[temp['similarity']<0.1]

Unnamed: 0_level_0,no_responsibilities,requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è),similarity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,–í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ —Ä–∞–±–æ—Ç –ø–æ –≥–Ω—É—Ç—å—é –∏ —Ä–µ–∑–∫–µ –∞—Ä–º–∞—Ç—É—Ä–Ω–æ–π ...,,,0.0
9336456,,,–í–∞—Ö—Ç–æ–≤—ã–π –º–µ—Ç–æ–¥ —Ä–∞–±–æ—Ç—ã 60/30 –∑/–ø –∑–∞ –º–µ—Å—è—Ü. –ù–∞ ...,0.0
9336457,,,–í–∞—Ö—Ç–æ–≤—ã–π –º–µ—Ç–æ–¥ —Ä–∞–±–æ—Ç—ã 60/30 –∑/–ø –∑–∞ –º–µ—Å—è—Ü. –ù–∞ ...,0.0
9396310,,–û–ø—ã—Ç –æ—Ç –≥–æ–¥–∞. –û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã –æ—Ç –≥–æ–¥–∞,–û–±—ä–µ–∫—Ç –≤ –º–æ—Å–∫–≤–µ. –ü—Ä–µ–º–∏—è –∑–∞ –≤—ã–ø–æ–ª–Ω–µ–Ω–∏–µ –æ–±—ä–µ–∫—Ç–æ–≤...,0.0
9396406,,"–ú—ã –∏—â–µ–º: –∞–∫—Ç–∏–≤–Ω—ã—Ö, –æ—Ç–≤–µ—Ç—Å—Ç–≤–µ–Ω–Ω—ã—Ö –ª—é–¥–µ–π, —Å –∂–µ–ª...",–ú—ã –ø—Ä–µ–¥–ª–∞–≥–∞–µ–º. –°—Ç–∞–±–∏–ª—å–Ω—É—é –∑–∞—Ä–∞–±–æ—Ç–Ω—É—é –ø–ª–∞—Ç—É (2 ...,0.0
9500588,–û—Ç–ª–∏—á–Ω–æ–µ —Ç–µ—Ö–Ω–∏—á–µ—Å–∫–æ–µ –æ—Å–Ω–∞—â–µ–Ω–∏–µ –º–∞–ª—è—Ä–Ω–æ–≥–æ —Ü–µ—Ö–∞....,,,0.0
14826,,–ó–Ω–∞–Ω–∏–µ —Ç–µ—Ö–Ω–æ–ª–æ–≥–∏—á–µ—Å–∫–∏—Ö –ø—Ä–æ—Ü–µ—Å—Å–æ–≤ —Å–≤–∞—Ä–∫–∏ –ø–Ω–¥ —Ç—Ä...,,0.0
9395175,–í–∞–∫–∞–Ω—Å–∏—è –®—Ç—É–∫–∞—Ç—É—Ä –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ –Ω–∞ —á–∏—Å—Ç–æ–≤—É—é ...,–û–ø—ã—Ç —à—Ç—É–∫–∞—Ç—É—Ä / —à—Ç—É–∫–∞—Ç—É—Ä / —à–ø–∞–∫–ª–µ–≤—â–∏–∫ –µ—Å–ª–∏ –≤—ã ...,–†–∞–±–æ—Ç–∞ —à—Ç—É–∫–∞—Ç—É—Ä –≤ –º–æ—Å–∫–≤–µ. –ò–ª–∏ –∑–≤–æ–Ω–∏—Ç–µ —Å–∞–º–∏,0.084
9395211,–†–∞–±–æ—Ç–∞ –®—Ç—É–∫–∞—Ç—É—Ä –≤ –ú–æ—Å–∫–≤–µ. –í–∞–∫–∞–Ω—Å–∏—è –®—Ç—É–∫–∞—Ç—É—Ä –≤ ...,–û–ø—ã—Ç —à—Ç—É–∫–∞—Ç—É—Ä / —à—Ç—É–∫–∞—Ç—É—Ä / —à–ø–∞–∫–ª–µ–≤—â–∏–∫ –µ—Å–ª–∏ –≤—ã ...,–†–∞–±–æ—Ç–∞ —à—Ç—É–∫–∞—Ç—É—Ä –≤ –º–æ—Å–∫–≤–µ. –ò–ª–∏ –∑–≤–æ–Ω–∏—Ç–µ —Å–∞–º–∏,0.09
9492424,,–û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã —Å–≤–∞—Ä—â–∏–∫–æ–º. –£–º–µ–Ω–∏–µ —Ä–∞–±–æ—Ç—ã —Ä—É—á–Ω–æ–π –¥—É...,"–ó–ø 80 —Ç. –ü—Ä–æ–µ–∑–¥, –ø—Ä–æ–∂–∏–≤–∞–Ω–∏–µ, –ø–∏—Ç–∞–Ω–∏–µ. –†–∞–±–æ—Ç–∞ –≤...",0.0


In [28]:
def check_str(str1, str2, color):
    # —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç –æ–¥–Ω—É —Å—Ç—Ä–æ–∫—É —Å –¥—Ä—É–≥–æ–π –∏ —Ä–∞—Å–∫—Ä–∞—à–∏–≤–∞–µ—Ç –≤ —Å–ª—É—á–∞–µ –æ—Ç–ª–∏—á–∏—è
    #pattern = r'[^–∞-—è–ê-–Ø—ë–Åa-zA-Z]'
    #str1 = re.sub(pattern, ' ', str1)
    #str1 = re.sub(" +", " ", str1)
    #str2 = re.sub(pattern, '', str2)
    #str2 = re.sub(" +", " ", str2)
    list1 = str1.lower().split()
    list2 = str2.lower().split()
    new_str1 = ""
    for word in list1:
        if word in list2:
            new_str1 += (word + " ")
        else:
            new_str1 += (colored(word, 'black', color) + " ")
    return new_str1

def check_col(df):
    no_resp, req, term = df.columns
    idxs = df.index.tolist()
    col1=[]
    col2=[]
    col3=[]
    for idx in idxs:
        str1 = df[no_resp][idx]
        str2 = df[req][idx]
        str3 = df[term][idx]
        str4 = df[req][idx] + df[term][idx]
        col1.append(check_str(str1, str4, "on_green"))
        col2.append(check_str(str2, str1, "on_yellow"))
        col3.append(check_str(str3, str1, "on_yellow"))
    df = pd.DataFrame({"no_responsibilities": col1, "requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)": 
                       col2, "terms(–£—Å–ª–æ–≤–∏—è)": col3}, index = idxs)
    return df

In [29]:
new_df = check_col(temp[['no_responsibilities', 'requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)', 'terms(–£—Å–ª–æ–≤–∏—è)']])
new_df

Unnamed: 0,no_responsibilities,requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è)
9495846,,,
9495850,,,
9495851,,,
2,[42m[30m–≤—ã–ø–æ–ª–Ω–µ–Ω–∏–µ[0m [42m[30m—Ä–∞–±–æ—Ç[0m ...,,
9388857,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ [42m[30m–ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è–ø...,–æ–ø—ã—Ç –≤ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–µ [43m[30m–ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è...,[43m[30m–≤–∞—Ö—Ç–∞[0m –≤ [43m[30m–≥–æ—Ä–æ–¥[0m [43...
...,...,...,...
9306889,[42m[30m–æ—Ñ–∏—Å–Ω—ã–µ[0m [42m[30m—É—Å–ª–æ–≤–∏—è[0m,[43m[30m–∑–Ω–∞–Ω–∏–µ[0m [43m[30m–∫–æ–Ω—Å—Ç—Ä—É–∫—Ç–æ—Ä—Å–∫–∏—Ö...,
9306890,–ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è –æ–ø—ã—Ç –ø–æ —É—Å—Ç–∞–Ω–æ–≤–∫–µ —Å–∏—Å—Ç–µ–º [42m...,–ø—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ—Ç—Å—è –æ–ø—ã—Ç –ø–æ —É—Å—Ç–∞–Ω–æ–≤–∫–µ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü...,–æ–ø–ª–∞—Ç–∞ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—Å—è —Ä–∞–∑ –≤ –Ω–µ–¥–µ–ª—é –ø–æ [43m[30m...
9306891,[42m[30m–æ–±—É—á–∞–µ–º[0m —Ç–æ–∫–∞—Ä–Ω—ã–µ –Ω–∞–≤—ã–∫–∏ –∏ —Å–≤–∞—Ä–æ—á...,[43m[30m—Ä–∞–±–æ—Ç–µ[0m [43m[30m–Ω–∞[0m [43m[3...,"[43m[30m–∑–∞—Ä–ø–ª–∞—Ç–∞,[0m –≥—Ä–∞—Ñ–∏–∫ —Ä–∞–±–æ—Ç—ã [43m[3..."
9306892,"–æ–ø—ã—Ç —Ä–∞–±–æ—Ç—ã, –∑–Ω–∞–Ω–∏–µ –∏ –ø–æ–Ω–∏–º–∞–Ω–∏–µ –ø—Ä–∏–Ω—Ü–∏–ø–æ–≤ —Ä–∞–±–æ...","–æ–ø—ã—Ç —Ä–∞–±–æ—Ç—ã, –∑–Ω–∞–Ω–∏–µ –∏ –ø–æ–Ω–∏–º–∞–Ω–∏–µ –ø—Ä–∏–Ω—Ü–∏–ø–æ–≤ —Ä–∞–±–æ...",


In [64]:
idxs = new_df.index.tolist()
for idx in idxs[993:]:
    print("\033[1m{}\033[0m".format("–í–∞–∫–∞–Ω—Å–∏—è"), idx)
    print("\033[1m{}\033[0m".format("–¢–µ–∫—Å—Ç –¥–ª—è —Ä–∞—Å–ø–æ–∑–Ω–∞–Ω–∏—è: "),  new_df['no_responsibilities'][idx])
    print()
    print("\033[1m{}\033[0m".format("–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è: "),  new_df['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx])
    print("\033[1m{}\033[0m".format("–£—Å–ª–æ–≤–∏—è: "),  new_df['terms(–£—Å–ª–æ–≤–∏—è)'][idx])
    print()

[1m–í–∞–∫–∞–Ω—Å–∏—è[0m 9306888
[1m–¢–µ–∫—Å—Ç –¥–ª—è —Ä–∞—Å–ø–æ–∑–Ω–∞–Ω–∏—è: [0m –æ—Ç—Å—É—Ç—Å—Ç–≤–∏—è –æ–ø—ã—Ç–∞, [42m[30m–æ–±—É—á–∞–µ–º–º–æ–Ω—Ç–∞–∂[0m –≤–æ–∑–¥—É—Ö–æ–≤–æ–¥–æ–≤, –æ–±–æ—Ä—É–¥–æ–≤–∞–Ω–∏—è –∏ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü–∏–æ–Ω–∏—Ä–æ–≤–∞–Ω–∏—è, 5/2, –≤–æ–∑–º–æ–∂–Ω—ã –∫–æ–º–∞–Ω–¥–∏—Ä–æ–≤–∫–∏ –ø–æ –±–∞—à–∫–∏—Ä–∏–∏, [42m[30m–æ–∫–ª–∞–¥–Ω–æ-–ø—Ä–µ–º–∏–∞–ª—å–Ω–∞—è.[0m 

[1m–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è: [0m [43m[30m—Å—Ç–∞–∂[0m [43m[30m—Ä–∞–±–æ—Ç—ã[0m [43m[30m–∂–µ–ª–∞—Ç–µ–ª—å–Ω–æ,[0m [43m[30m–≤[0m [43m[30m—Å–ª—É—á–∞–µ[0m –æ—Ç—Å—É—Ç—Å—Ç–≤–∏—è –æ–ø—ã—Ç–∞, [43m[30m–æ–±—É—á–∞–µ–º,[0m [43m[30m–º–æ–Ω—Ç–∞–∂[0m –≤–æ–∑–¥—É—Ö–æ–≤–æ–¥–æ–≤, –æ–±–æ—Ä—É–¥–æ–≤–∞–Ω–∏—è –∏ —Å–∏—Å—Ç–µ–º –∫–æ–Ω–¥–∏—Ü–∏–æ–Ω–∏—Ä–æ–≤–∞–Ω–∏—è, 5/2, –≤–æ–∑–º–æ–∂–Ω—ã –∫–æ–º–∞–Ω–¥–∏—Ä–æ–≤–∫–∏ –ø–æ –±–∞—à–∫–∏—Ä–∏–∏, [43m[30m–æ–∫–ª–∞–¥–Ω–æ[0m 
[1m–£—Å–ª–æ–≤–∏—è: [0m 

[1m–í–∞–∫–∞–Ω—Å–∏—è[0m 9306889
[1m–¢–µ–∫—Å—Ç –¥–ª—è —Ä–∞—Å–ø–æ–∑–Ω–∞–Ω–∏—è: [0m [42m[30m–æ—Ñ–∏—Å–Ω—ã–µ[0m [42m[30m—É—Å–ª–æ–≤

–ü–æ–∏—Å–∫–æ–≤—ã–π –±–æ—Ç

In [253]:
def get_similarity(find, df, n=5):
    # find - –∑–∞–ø—Ä–æ—Å —Å–æ–∏—Å–∫–∞—Ç–µ–ª—è
    # data - —Ç–∞–±–ª–∏—Ü–∞ —Å –∏—Å–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–º–∏ –º–æ–¥–µ–ª—å—é –≤–∞–∫–∞–Ω—Å–∏—è–º–∏. –ë–æ—Ç –Ω–µ –±—É–¥–µ—Ç –∏—Ö —Å—á–∏—Ç–∞—Ç—å
    # n - –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –≤—ã–≤–æ–¥–∏–º—ã—Ö –≤–∞–∫–∞–Ω—Å–∏–π. –∑–∞–∫–æ–¥–∏—Ä—É–π, —á—Ç–æ–±—ã –∫—Ä–∞—Å–∏–≤–æ –±—ã–ª–æ 3-5 —à—Ç—É–∫
    
    #–≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –æ—Ç—Ñ–∏–ª—å—Ç–≤–æ–≤–∞–Ω–Ω—ã–π –¥–∞—Ç–∞—Ñ—Ä–µ–π–º –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö –≤–∞–∫–∞–Ω—Å–∏–π
    data = df.copy()
    idxs = data.index.tolist()
    diffs=[]
    str_find = str(find.lower()) 
    for idx in idxs:
        resp = str(data['responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)'][idx])
        req = str(data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx])
        term = str(data['terms(–£—Å–ª–æ–≤–∏—è)'][idx])
        str_data = resp + " " +  req + " " +  term
        diff = difflib.SequenceMatcher(lambda x: x == " ", str_find, str_data).ratio() 
        diffs.append(round(diff, 3))
        
    data['similarity']=diffs
    #print(data)
    data.sort_values(by='similarity', ascending=False, inplace=True)
   
    # data.head(n)
    return data[0:n]

In [254]:
#–∑–∞–≥—Ä—É–∂–∞–µ–º –¥–∞—Ç–∞—Å–µ—Ç - —Ä–µ–∑—É–ª—å—Ç–∞—Ç –º–æ–¥–µ–ª–∏ (–±—É–¥–µ—Ç —Ñ–∞–π–ª)
data_itog = pd.read_excel('–†–µ—à–µ–Ω–∏–µ.xlsx', index_col=0)
# —Å—Ç—Ä–æ–∫–∞, –∫–æ—Ç–æ—Ä—É—é –≤–≤–æ–¥–∏—Ç —Å–æ–∏—Å–∫–∞—Ç–µ–ª—å
find = "—Å–ª–µ—Å–∞—Ä—å —Å–∞–Ω—Ç–µ—Ö–Ω–∏–∫, —Ä–∞–±–æ—Ç–∞ –Ω–∞ —Å–≤–∞—Ä–æ—á–Ω–æ–º —Å—Ç–µ–Ω–¥–µ"
# –æ—Ç—Ñ–∏–ª—å—Å—Ç—Ä–æ–≤–∞–Ω–Ω—ã–π –¥–∞—Ç–∞—Å–µ—Ç –¥–ª—è –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏
filter_data = get_similarity(find, data_itog)

In [255]:
filter_data

Unnamed: 0_level_0,name(–Ω–∞–∑–≤–∞–Ω–∏–µ),responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏),requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é),terms(–£—Å–ª–æ–≤–∏—è),skills(–ö–ª—é—á–µ–≤—ã–µ –Ω–∞–≤—ã–∫–∏),similarity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9493679,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,–û–ø–∏—Å–∞–Ω–∏–µ —Ä–∞–±–æ—Ç–æ–¥–∞—Ç–µ–ª—è: –º–æ–Ω—Ç–∞–∂ —Å–∏—Å—Ç–µ–º —Å–∫—É–¥ —Å–∫—Å,–ó–Ω–∞–Ω–∏–µ —Å–∏—Å—Ç–µ–º,,,0.37
9306645,–®—Ç—É–∫–∞—Ç—É—Ä,,"–ê–∫–∫—É—Ä–∞—Ç–Ω–æ—Å—Ç—å, –¥–∏—Å—Ü–∏–ø–ª–∏–Ω–∏—Ä–æ–≤–∞–Ω–Ω–æ—Å—Ç—å",–û–ø–∏—Å–∞–Ω–∏–µ —Ä–∞–±–æ—Ç–æ–¥–∞—Ç–µ–ª—è:—Å—Ç—Ä–æ–∏—Ç–µ–ª—å–Ω–∞—è –∫–æ–º–ø–∞–Ω–∏—è,,0.331
9306839,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,,,–ó–∞—Ä–∞–±–æ—Ç–Ω–∞—è –ø–ª–∞—Ç–∞ –æ–±–≥–æ–≤–∞—Ä–∏–≤–∞–µ—Ç—Å—è –Ω–µ–ø–æ—Å—Ä–µ–¥—Å—Ç–≤–µ–Ω–Ω...,,0.328
9306599,–ú–∞—à–∏–Ω–∏—Å—Ç –ö—Ä–∞–Ω–∞ (–ö—Ä–∞–Ω–æ–≤—â–∏–∫),–°–ª–µ–¥–∏—Ç—å –∑–∞ —Ç–µ—Ö–Ω–∏–∫–æ–π –∏ –º–µ–ª–∫–∏–π —Ä–µ–º–æ–Ω—Ç —Å–≤–æ–∏–º–∏ —Å–∏–ª–∞–º–∏,,–û—Ñ–∏—Ü–∏–∞–ª—å–Ω–æ–µ —Ç—Ä—É–¥–æ—É—Å—Ç—Ä–æ–π—Å—Ç–≤–æ. –í–æ–∑–º–æ–∂–Ω—ã –∫–æ–º–∞–Ω–¥–∏—Ä...,,0.322
9306804,–ú–æ–Ω—Ç–∞–∂–Ω–∏–∫,,,"–û–ø–ª–∞—Ç–∞ —Ç—Ä—É–¥–∞ –¥–æ—Å—Ç–æ–π–Ω–∞—è, —Å–≤–æ–µ–≤—Ä–µ–º–µ–Ω–Ω–∞—è. –°–ø–µ—Ü–æ–¥–µ–∂–¥–∞",,0.314


In [256]:
idxs = filter_data.index.tolist()
idx = idxs[4]
print("–í–∞–∫–∞–Ω—Å–∏—è", filter_data['name(–Ω–∞–∑–≤–∞–Ω–∏–µ)'][idx])
print("–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏", filter_data['responsibilities(–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏)'][idx])
print("–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é", filter_data['requirements(–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é)'][idx])
print("–£—Å–ª–æ–≤–∏—è —Ä–∞–±–æ—Ç—ã", filter_data['terms(–£—Å–ª–æ–≤–∏—è)'][idx])

–í–∞–∫–∞–Ω—Å–∏—è –ú–æ–Ω—Ç–∞–∂–Ω–∏–∫
–î–æ–ª–∂–Ω–æ—Å—Ç–Ω—ã–µ –æ–±—è–∑–∞–Ω–Ω–æ—Å—Ç–∏ nan
–¢—Ä–µ–±–æ–≤–∞–Ω–∏—è –∫ —Å–æ–∏—Å–∫–∞—Ç–µ–ª—é nan
–£—Å–ª–æ–≤–∏—è —Ä–∞–±–æ—Ç—ã –û–ø–ª–∞—Ç–∞ —Ç—Ä—É–¥–∞ –¥–æ—Å—Ç–æ–π–Ω–∞—è, —Å–≤–æ–µ–≤—Ä–µ–º–µ–Ω–Ω–∞—è. –°–ø–µ—Ü–æ–¥–µ–∂–¥–∞
