# MODELING

In [1]:
from tqdm import tqdm
from collections import defaultdict
import gensim
from gensim.corpora import Dictionary
from gensim.models import Phrases
from gensim.models import LdaModel
import pyLDAvis.gensim
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from pymystem3 import Mystem
import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import HeatMap
import seaborn as sns
%matplotlib inline

  from geopandas.geoseries import GeoSeries


# Select the neighborhood and load its data

In [4]:
neigh_posts

NameError: name 'neigh_posts' is not defined

In [5]:
name = 'Izmajlovo'
district = gpd.read_file('../Data dive/dd2/{}/{}_district.geojson'.format(name,name))
neigh_posts = pd.read_csv('social_media/{}/vk.csv'.format(name))

In [3]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

# Group posts by user

In [6]:
vk_users = pd.DataFrame({'post' : neigh_posts.groupby('userId').apply( lambda x: ' '.join(x['text']))}).reset_index()

# How many users are there?

In [7]:
extra_words = ['http','br','id','com','www', 'instagram', 'vsco', 'https', 'instasize','repost',
              'whatsapp', 'вотсап', 'repostapp','маникюр', 'бровь', 'ресница', 'губа', 'instacollage', 'опубликовывать',
                'фото', 'москва', 'moscow']
def process_docs(docs):
    """
    Function to process texts. Following are the steps we take:
    
    1. Text tokenization.
    2. Removing numbers 
    3. Stopword and short words Removal.
    4. Lemmatization and filter words by their length.
    
    Args:
    ----------
    texts: Tokenized texts.
    
    Returns:
    -------
    texts: Pre-processed tokenized texts.
    """
    m = Mystem()
    # Split the documents into tokens.
    tokenizer = RegexpTokenizer(r'\w+')
    stops = stopwords.words('russian') + stopwords.words('english') + extra_words
    
    for idx in tqdm(range(len(docs))):
        docs[idx] = docs[idx].lower()  # Convert to lowercase.
        docs[idx] = tokenizer.tokenize(docs[idx])  # Split into words.

    # Remove numbers, but not words that contain numbers.
    docs = [[token for token in doc if not any(c.isdigit() for c in token) and ('id' not in token or 'club' not in token or 'ru' not in token)] for doc in tqdm(docs)]
    #Remove stopwords
    docs = [[token for token in doc if token not in stops] for doc in tqdm(docs)]
    # Remove words that are only one character.
    docs = [[token for token in doc if len(token) > 3] for doc in tqdm(docs)]
    return docs
    
def get_corpus(docs):
    
    """Add bigrams to docs and create corpus and dictionary for training
    
    Args:
        docs: list of tokenized and cleaned texts;
    Returns:
        corpus: list of lists of tuples, where first element of tuple is a word id
        and the second is the count of that word in the whole corpus
        dictionary: gensim.corpora.dictionary.Dictionary 
  
    """
    
    frequency = defaultdict(int)
    for text in tqdm(docs):
        for token in text:
            frequency[token] += 1

    texts = [[token for token in text if frequency[token] > 3] for text in tqdm(docs)]

    #Take the bigram, if token is a bigram, add to document.
    bigram = Phrases(texts, min_count = 20)
    for idx in tqdm(range(len(texts))):
        for token in bigram[texts[idx]]:
            if '_' in token:
                texts[idx].append(token)
    
    # Create a dictionary representation of the documents.
    dictionary = Dictionary(texts)
    # Filter out words that occur less than 20 documents, or more than 50% of the documents.
    #dictionary.filter_extremes(no_below=20, no_above=0.5)
    corpus = [dictionary.doc2bow(doc) for doc in tqdm(texts)]
    
    print('Number of unique tokens: {}'.format(len(dictionary)))
    print('Number of documents: {}'.format(len(corpus)))
    
    return corpus, dictionary

# Process texts to be ready for modeling

In [8]:
import nltk

In [9]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\K\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [10]:
texts = vk_users['post'].copy()

In [11]:
docs = process_docs(texts.values)

Installing mystem to C:\Users\K/.local/bin\mystem.exe from http://download.cdn.yandex.net/mystem/mystem-3.0-win7-64bit.zip
100%|██████████| 17369/17369 [00:00<00:00, 59730.57it/s]
100%|██████████| 17369/17369 [00:00<00:00, 25400.06it/s]
100%|██████████| 17369/17369 [00:01<00:00, 8716.57it/s]
100%|██████████| 17369/17369 [00:00<00:00, 71877.22it/s]


In [12]:
corpus, dictionary = get_corpus(docs)

100%|██████████| 17369/17369 [00:00<00:00, 154668.81it/s]
100%|██████████| 17369/17369 [00:00<00:00, 231007.85it/s]
100%|██████████| 17369/17369 [00:00<00:00, 18182.76it/s]
100%|██████████| 17369/17369 [00:00<00:00, 56978.04it/s]


Number of unique tokens: 11207
Number of documents: 17369


In [13]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


#The training model - we use online LDA model which allows to update the model 
#and the following parameters should be defined
num_topics = 10 # number of topics
chunksize = 1000 
passes = 10
iterations = 400
eval_every = 10  #evaluate model perplexity.

# Make a index to word dictionary.
temp = dictionary[0]  # This is only to "load" the dictionary.
id2word = dictionary.id2token

%time model = LdaModel(corpus=corpus, id2word=id2word, chunksize=chunksize, \
                       alpha=0.001, update_every = 1, \
                       num_topics=num_topics,\
                       eval_every=eval_every, passes = passes)

2017-07-20 16:02:16,079 : INFO : using symmetric eta at 8.92299455697332e-05
2017-07-20 16:02:16,087 : INFO : using serial LDA version on this node
2017-07-20 16:02:16,812 : INFO : running online (multi-pass) LDA training, 10 topics, 10 passes over the supplied corpus of 17369 documents, updating model once every 1000 documents, evaluating perplexity every 10000 documents, iterating 50x with a convergence threshold of 0.001000
2017-07-20 16:02:16,815 : INFO : PROGRESS: pass 0, at document #1000/17369
2017-07-20 16:02:17,902 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:17,995 : INFO : topic #7 (0.001): 0.049*"измайлово" + 0.025*"парк" + 0.025*"измайловский" + 0.019*"кремль" + 0.017*"дельта" + 0.015*"измайлово_гамма" + 0.015*"гамма" + 0.014*"измайловский_парк" + 0.013*"кремль_измайлово" + 0.010*"rock"
2017-07-20 16:02:18,000 : INFO : topic #6 (0.001): 0.026*"измайлово" + 0.020*"hotel" + 0.016*"house" + 0.015*"vega" + 0.013*"ананас" + 0.013

2017-07-20 16:02:21,065 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:21,141 : INFO : topic #4 (0.001): 0.058*"измайловский" + 0.050*"измайлово" + 0.048*"усадьба" + 0.047*"остров" + 0.045*"измайловский_остров" + 0.041*"усадьба_измайлово" + 0.011*"проспект" + 0.011*"измайловский_проспект" + 0.008*"картинг" + 0.006*"музей_усадьба"
2017-07-20 16:02:21,144 : INFO : topic #8 (0.001): 0.029*"метро" + 0.017*"партизанская" + 0.013*"очень" + 0.009*"сегодня" + 0.008*"станция" + 0.007*"роддом" + 0.007*"метро_партизанская" + 0.007*"день" + 0.007*"алексино_истра" + 0.007*"станция_метро"
2017-07-20 16:02:21,147 : INFO : topic #1 (0.001): 0.036*"hotel" + 0.031*"vega" + 0.031*"best" + 0.029*"western" + 0.029*"best_western" + 0.029*"center" + 0.029*"convention" + 0.029*"plus" + 0.028*"hotel_convention" + 0.028*"plus_vega"
2017-07-20 16:02:21,150 : INFO : topic #5 (0.001): 0.094*"измайловский" + 0.075*"парк" + 0.068*"измайловский_парк" + 0.014*"бульвар" + 

2017-07-20 16:02:24,635 : INFO : topic #7 (0.001): 0.070*"измайлово" + 0.047*"дельта" + 0.046*"гамма" + 0.046*"измайлово_гамма" + 0.028*"park" + 0.024*"izmaylovsky" + 0.024*"izmaylovsky_park" + 0.020*"альфа" + 0.020*"гостиница" + 0.020*"сокольники"
2017-07-20 16:02:24,638 : INFO : topic #3 (0.001): 0.053*"ргуфксит" + 0.018*"ргуфксмит" + 0.017*"izmaylovo" + 0.015*"district" + 0.015*"izmaylovo_district" + 0.012*"pervomayskaya" + 0.011*"ргуфк" + 0.010*"beta" + 0.009*"hotel" + 0.009*"измайлово"
2017-07-20 16:02:24,641 : INFO : topic #0 (0.001): 0.160*"измайлово" + 0.145*"кремль" + 0.107*"кремль_измайлово" + 0.025*"измайловский" + 0.018*"площадь" + 0.018*"красная" + 0.017*"красная_площадь" + 0.016*"измайловскийкремль" + 0.009*"виноградный" + 0.008*"серебряно"
2017-07-20 16:02:24,643 : INFO : topic #6 (0.001): 0.034*"hotel" + 0.024*"make" + 0.024*"atelier" + 0.021*"make_atelier" + 0.020*"school" + 0.019*"delta" + 0.014*"gamma" + 0.013*"gamma_delta" + 0.013*"спасибо" + 0.011*"дельта_izmailovo

2017-07-20 16:02:26,949 : INFO : topic #5 (0.001): 0.106*"измайловский" + 0.084*"парк" + 0.071*"измайловский_парк" + 0.018*"бульвар" + 0.017*"измайловский_бульвар" + 0.014*"первомайская" + 0.012*"измайловскийпарк" + 0.009*"гимназия" + 0.007*"отдыха" + 0.006*"культуры"
2017-07-20 16:02:26,954 : INFO : topic diff=0.301345, rho=0.250000
2017-07-20 16:02:26,956 : INFO : PROGRESS: pass 0, at document #17000/17369
2017-07-20 16:02:27,330 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:27,378 : INFO : topic #5 (0.001): 0.100*"измайловский" + 0.079*"парк" + 0.067*"измайловский_парк" + 0.017*"бульвар" + 0.017*"первомайская" + 0.016*"измайловский_бульвар" + 0.016*"пишите" + 0.013*"вопросам" + 0.011*"всем" + 0.010*"измайловскийпарк"
2017-07-20 16:02:27,380 : INFO : topic #3 (0.001): 0.053*"ргуфксит" + 0.021*"izmaylovo" + 0.020*"izmaylovo_district" + 0.020*"district" + 0.018*"pervomayskaya" + 0.017*"beta" + 0.014*"ргуфксмит" + 0.011*"ргуфк" + 0.011*"ho

2017-07-20 16:02:29,535 : INFO : topic #3 (0.001): 0.038*"ргуфксит" + 0.022*"izmaylovo" + 0.022*"district" + 0.021*"izmaylovo_district" + 0.016*"pervomayskaya" + 0.011*"ргуфксмит" + 0.009*"beta" + 0.009*"ргуфк" + 0.008*"всем" + 0.007*"парикмахерская"
2017-07-20 16:02:29,538 : INFO : topic #4 (0.001): 0.066*"измайловский" + 0.054*"измайлово" + 0.051*"остров" + 0.051*"усадьба" + 0.049*"измайловский_остров" + 0.044*"усадьба_измайлово" + 0.019*"проспект" + 0.019*"измайловский_проспект" + 0.008*"платье" + 0.008*"картинг"
2017-07-20 16:02:29,542 : INFO : topic #0 (0.001): 0.167*"измайлово" + 0.145*"кремль" + 0.110*"кремль_измайлово" + 0.024*"измайловский" + 0.015*"измайловскийкремль" + 0.013*"площадь" + 0.012*"красная" + 0.011*"красная_площадь" + 0.011*"свадьба" + 0.008*"вернисаж"
2017-07-20 16:02:29,544 : INFO : topic #9 (0.001): 0.015*"магазин" + 0.014*"цена" + 0.012*"наличии" + 0.009*"очень" + 0.008*"концертный" + 0.008*"концертный_измайлово" + 0.007*"измайлово" + 0.007*"заказа" + 0.007*"

2017-07-20 16:02:31,544 : INFO : topic #6 (0.001): 0.034*"make" + 0.034*"atelier" + 0.030*"school" + 0.030*"make_atelier" + 0.018*"hotel" + 0.013*"delta" + 0.012*"спасибо" + 0.009*"школа" + 0.009*"gamma" + 0.009*"gamma_delta"
2017-07-20 16:02:31,546 : INFO : topic #9 (0.001): 0.011*"концертный" + 0.010*"концертный_измайлово" + 0.008*"square" + 0.008*"очень" + 0.008*"спасибо" + 0.008*"измайлово" + 0.008*"качка" + 0.007*"russianshowmenweek" + 0.007*"мюзикл" + 0.007*"россии"
2017-07-20 16:02:31,552 : INFO : topic #3 (0.001): 0.055*"ргуфксит" + 0.018*"izmaylovo" + 0.017*"district" + 0.016*"pervomayskaya" + 0.015*"izmaylovo_district" + 0.012*"ргуфксмит" + 0.011*"beta" + 0.009*"ргуфк" + 0.009*"hotel" + 0.008*"проезд"
2017-07-20 16:02:31,556 : INFO : topic diff=0.131867, rho=0.227220
2017-07-20 16:02:32,377 : INFO : -8.355 per-word bound, 327.5 perplexity estimate based on a held-out corpus of 1000 documents with 14772 words
2017-07-20 16:02:32,380 : INFO : PROGRESS: pass 1, at document #1000

2017-07-20 16:02:34,294 : INFO : topic #2 (0.001): 0.022*"izmailovo" + 0.020*"kremlin" + 0.017*"izmailovo_kremlin" + 0.014*"пруд" + 0.012*"круглый" + 0.011*"круглый_пруд" + 0.010*"парковая" + 0.008*"счастье" + 0.007*"семья" + 0.006*"спасибо"
2017-07-20 16:02:34,297 : INFO : topic diff=0.108125, rho=0.227220
2017-07-20 16:02:34,300 : INFO : PROGRESS: pass 1, at document #15000/17369
2017-07-20 16:02:34,610 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:34,645 : INFO : topic #0 (0.001): 0.182*"измайлово" + 0.157*"кремль" + 0.120*"кремль_измайлово" + 0.026*"площадь" + 0.024*"измайловский" + 0.023*"красная" + 0.021*"красная_площадь" + 0.014*"измайловскийкремль" + 0.009*"вернисаж" + 0.007*"свадьба"
2017-07-20 16:02:34,647 : INFO : topic #2 (0.001): 0.024*"izmailovo" + 0.018*"kremlin" + 0.015*"izmailovo_kremlin" + 0.013*"парковая" + 0.013*"пруд" + 0.011*"второйдом" + 0.011*"круглый" + 0.010*"круглый_пруд" + 0.008*"счастье" + 0.007*"комплекс"
201

2017-07-20 16:02:36,654 : INFO : topic #5 (0.001): 0.108*"измайловский" + 0.082*"парк" + 0.070*"измайловский_парк" + 0.016*"бульвар" + 0.015*"измайловский_бульвар" + 0.010*"измайловскийпарк" + 0.009*"пишите" + 0.009*"вопросам" + 0.009*"всем" + 0.008*"первомайская"
2017-07-20 16:02:36,656 : INFO : topic #1 (0.001): 0.057*"hotel" + 0.050*"vega" + 0.041*"best" + 0.040*"western" + 0.040*"best_western" + 0.040*"center" + 0.039*"convention" + 0.037*"plus" + 0.037*"plus_vega" + 0.037*"hotel_convention"
2017-07-20 16:02:36,659 : INFO : topic #7 (0.001): 0.069*"измайлово" + 0.041*"гамма" + 0.040*"дельта" + 0.040*"измайлово_гамма" + 0.032*"park" + 0.027*"izmaylovsky" + 0.027*"izmaylovsky_park" + 0.022*"house" + 0.021*"сокольники" + 0.020*"альфа"
2017-07-20 16:02:36,662 : INFO : topic #6 (0.001): 0.031*"make" + 0.030*"atelier" + 0.028*"make_atelier" + 0.026*"school" + 0.014*"hotel" + 0.012*"спасибо" + 0.011*"ateliermoscowlife" + 0.010*"день" + 0.010*"бигуди" + 0.010*"работа"
2017-07-20 16:02:36,6

2017-07-20 16:02:38,599 : INFO : topic #2 (0.001): 0.018*"izmailovo" + 0.016*"kremlin" + 0.013*"izmailovo_kremlin" + 0.012*"пруд" + 0.010*"круглый" + 0.009*"круглый_пруд" + 0.009*"счастье" + 0.008*"anton" + 0.008*"anton_nizhnick" + 0.008*"nizhnick"
2017-07-20 16:02:38,602 : INFO : topic #3 (0.001): 0.046*"ргуфксит" + 0.020*"izmaylovo" + 0.018*"district" + 0.018*"pervomayskaya" + 0.017*"izmaylovo_district" + 0.012*"beta" + 0.011*"ргуфксмит" + 0.009*"hotel" + 0.008*"beta_hotel" + 0.007*"ргуфк"
2017-07-20 16:02:38,606 : INFO : topic diff=0.092359, rho=0.221572
2017-07-20 16:02:38,613 : INFO : PROGRESS: pass 2, at document #8000/17369
2017-07-20 16:02:38,944 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:38,971 : INFO : topic #7 (0.001): 0.074*"измайлово" + 0.044*"гамма" + 0.043*"дельта" + 0.043*"измайлово_гамма" + 0.032*"park" + 0.026*"izmaylovsky" + 0.026*"izmaylovsky_park" + 0.021*"сокольники" + 0.020*"альфа" + 0.018*"гостиница"
2017-07-20 

2017-07-20 16:02:41,593 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:41,620 : INFO : topic #7 (0.001): 0.079*"измайлово" + 0.050*"дельта" + 0.050*"гамма" + 0.049*"измайлово_гамма" + 0.030*"park" + 0.026*"izmaylovsky" + 0.026*"izmaylovsky_park" + 0.022*"сокольники" + 0.020*"альфа" + 0.019*"гостиница"
2017-07-20 16:02:41,623 : INFO : topic #6 (0.001): 0.028*"make" + 0.027*"atelier" + 0.024*"school" + 0.024*"make_atelier" + 0.016*"delta" + 0.015*"hotel" + 0.013*"beauty" + 0.013*"casa" + 0.012*"gamma" + 0.012*"gamma_delta"
2017-07-20 16:02:41,626 : INFO : topic #3 (0.001): 0.059*"ргуфксит" + 0.019*"izmaylovo" + 0.017*"district" + 0.017*"pervomayskaya" + 0.017*"izmaylovo_district" + 0.016*"ргуфксмит" + 0.011*"ргуфк" + 0.011*"beta" + 0.008*"hotel" + 0.008*"всем"
2017-07-20 16:02:41,628 : INFO : topic #8 (0.001): 0.024*"метро" + 0.014*"партизанская" + 0.011*"очень" + 0.011*"сегодня" + 0.010*"partizanskaya" + 0.010*"измайловского" + 0.009*"измай

2017-07-20 16:02:43,575 : INFO : topic #3 (0.001): 0.041*"ргуфксит" + 0.026*"izmaylovo" + 0.024*"district" + 0.024*"izmaylovo_district" + 0.022*"pervomayskaya" + 0.017*"парикмахерская" + 0.013*"beta" + 0.010*"ргуфк" + 0.009*"ргуфксмит" + 0.009*"beta_hotel"
2017-07-20 16:02:43,578 : INFO : topic #0 (0.001): 0.187*"измайлово" + 0.151*"кремль" + 0.116*"кремль_измайлово" + 0.022*"измайловский" + 0.020*"площадь" + 0.017*"красная" + 0.016*"красная_площадь" + 0.012*"измайловскийкремль" + 0.009*"вернисаж" + 0.006*"свадьба"
2017-07-20 16:02:43,580 : INFO : topic #5 (0.001): 0.106*"измайловский" + 0.082*"парк" + 0.069*"измайловский_парк" + 0.018*"бульвар" + 0.017*"измайловский_бульвар" + 0.015*"вопросам" + 0.015*"пишите" + 0.010*"измайловскийпарк" + 0.008*"всем_вопросам" + 0.008*"первомайская"
2017-07-20 16:02:43,584 : INFO : topic #4 (0.001): 0.058*"измайлово" + 0.048*"измайловский" + 0.046*"усадьба" + 0.045*"остров" + 0.043*"измайловский_остров" + 0.038*"усадьба_измайлово" + 0.018*"картинг" + 

2017-07-20 16:02:45,459 : INFO : topic #8 (0.001): 0.032*"метро" + 0.022*"партизанская" + 0.013*"partizanskaya" + 0.012*"сегодня" + 0.012*"очень" + 0.008*"метро_партизанская" + 0.008*"станция" + 0.008*"день" + 0.008*"спасибо" + 0.007*"рождения"
2017-07-20 16:02:45,461 : INFO : topic #1 (0.001): 0.058*"hotel" + 0.050*"vega" + 0.042*"best" + 0.040*"center" + 0.040*"western" + 0.040*"best_western" + 0.040*"convention" + 0.038*"plus" + 0.038*"plus_vega" + 0.038*"hotel_convention"
2017-07-20 16:02:45,464 : INFO : topic diff=0.081378, rho=0.216326
2017-07-20 16:02:45,468 : INFO : PROGRESS: pass 3, at document #6000/17369
2017-07-20 16:02:45,800 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:45,826 : INFO : topic #1 (0.001): 0.060*"hotel" + 0.052*"vega" + 0.044*"best" + 0.041*"western" + 0.041*"best_western" + 0.041*"center" + 0.041*"convention" + 0.039*"plus" + 0.039*"plus_vega" + 0.039*"hotel_convention"
2017-07-20 16:02:45,828 : INFO : topic #

2017-07-20 16:02:48,114 : INFO : topic diff=0.071011, rho=0.216326
2017-07-20 16:02:48,118 : INFO : PROGRESS: pass 3, at document #11000/17369
2017-07-20 16:02:48,431 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:48,457 : INFO : topic #4 (0.001): 0.066*"измайлово" + 0.062*"измайловский" + 0.056*"усадьба" + 0.055*"остров" + 0.054*"измайловский_остров" + 0.047*"усадьба_измайлово" + 0.010*"музей" + 0.008*"vscocam" + 0.008*"музей_усадьба" + 0.007*"картинг"
2017-07-20 16:02:48,460 : INFO : topic #6 (0.001): 0.032*"make" + 0.031*"atelier" + 0.028*"make_atelier" + 0.027*"school" + 0.018*"delta" + 0.013*"gamma" + 0.012*"gamma_delta" + 0.012*"спасибо" + 0.010*"школа" + 0.010*"дельта_izmailovo"
2017-07-20 16:02:48,463 : INFO : topic #0 (0.001): 0.182*"измайлово" + 0.153*"кремль" + 0.114*"кремль_измайлово" + 0.024*"измайловский" + 0.019*"площадь" + 0.018*"красная" + 0.017*"красная_площадь" + 0.017*"измайловскийкремль" + 0.009*"вернисаж" + 0.009*"вин

2017-07-20 16:02:50,265 : INFO : topic #2 (0.001): 0.022*"izmailovo" + 0.019*"kremlin" + 0.017*"izmailovo_kremlin" + 0.015*"парковая" + 0.013*"пруд" + 0.011*"круглый" + 0.010*"круглый_пруд" + 0.009*"счастье" + 0.008*"второйдом" + 0.007*"спасибо"
2017-07-20 16:02:50,271 : INFO : topic #6 (0.001): 0.036*"make" + 0.035*"atelier" + 0.033*"school" + 0.032*"make_atelier" + 0.016*"delta" + 0.014*"школа" + 0.012*"ateliermoscowlife" + 0.011*"gamma" + 0.011*"спасибо" + 0.011*"gamma_delta"
2017-07-20 16:02:50,274 : INFO : topic #4 (0.001): 0.068*"измайлово" + 0.058*"измайловский" + 0.056*"усадьба" + 0.055*"остров" + 0.053*"измайловский_остров" + 0.046*"усадьба_измайлово" + 0.011*"музей" + 0.009*"музей_усадьба" + 0.007*"картинг" + 0.007*"photo"
2017-07-20 16:02:50,277 : INFO : topic #1 (0.001): 0.067*"hotel" + 0.056*"vega" + 0.047*"best" + 0.045*"western" + 0.045*"best_western" + 0.045*"center" + 0.045*"convention" + 0.044*"plus" + 0.043*"plus_vega" + 0.043*"hotel_convention"
2017-07-20 16:02:50,2

2017-07-20 16:02:52,224 : INFO : topic #1 (0.001): 0.059*"hotel" + 0.052*"vega" + 0.043*"best" + 0.041*"center" + 0.041*"western" + 0.041*"best_western" + 0.041*"convention" + 0.038*"plus" + 0.038*"plus_vega" + 0.038*"hotel_convention"
2017-07-20 16:02:52,227 : INFO : topic #4 (0.001): 0.062*"измайлово" + 0.057*"измайловский" + 0.055*"усадьба" + 0.054*"остров" + 0.052*"измайловский_остров" + 0.046*"усадьба_измайлово" + 0.011*"картинг" + 0.009*"музей" + 0.008*"музей_усадьба" + 0.006*"весна"
2017-07-20 16:02:52,230 : INFO : topic diff=0.085651, rho=0.211435
2017-07-20 16:02:52,233 : INFO : PROGRESS: pass 4, at document #4000/17369
2017-07-20 16:02:52,552 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:52,578 : INFO : topic #3 (0.001): 0.038*"ргуфксит" + 0.022*"izmaylovo" + 0.022*"district" + 0.021*"izmaylovo_district" + 0.020*"pervomayskaya" + 0.010*"ргуфксмит" + 0.009*"beta" + 0.009*"ргуфк" + 0.007*"парикмахерская" + 0.007*"всем"
2017-07-20 

2017-07-20 16:02:54,375 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:54,397 : INFO : topic #4 (0.001): 0.065*"измайлово" + 0.063*"измайловский" + 0.055*"усадьба" + 0.054*"остров" + 0.052*"измайловский_остров" + 0.047*"усадьба_измайлово" + 0.008*"музей" + 0.008*"музей_усадьба" + 0.007*"картинг" + 0.007*"vscocam"
2017-07-20 16:02:54,403 : INFO : topic #3 (0.001): 0.053*"ргуфксит" + 0.019*"pervomayskaya" + 0.018*"izmaylovo" + 0.017*"district" + 0.015*"izmaylovo_district" + 0.012*"ргуфксмит" + 0.011*"beta" + 0.009*"ргуфк" + 0.008*"beta_hotel" + 0.007*"сегодня"
2017-07-20 16:02:54,407 : INFO : topic #9 (0.001): 0.011*"концертный" + 0.011*"всем" + 0.011*"первомайская" + 0.010*"концертный_измайлово" + 0.010*"square" + 0.009*"очень" + 0.007*"спасибо" + 0.007*"качка" + 0.007*"мюзикл" + 0.007*"russianshowmenweek"
2017-07-20 16:02:54,410 : INFO : topic #0 (0.001): 0.185*"измайлово" + 0.153*"кремль" + 0.117*"кремль_измайлово" + 0.023*"измайловский" 

2017-07-20 16:02:56,974 : INFO : topic #3 (0.001): 0.058*"ргуфксит" + 0.020*"pervomayskaya" + 0.019*"izmaylovo" + 0.017*"district" + 0.017*"ргуфксмит" + 0.016*"izmaylovo_district" + 0.011*"beta" + 0.011*"ргуфк" + 0.008*"всем" + 0.007*"бета"
2017-07-20 16:02:56,977 : INFO : topic #7 (0.001): 0.078*"измайлово" + 0.047*"дельта" + 0.046*"гамма" + 0.045*"измайлово_гамма" + 0.032*"park" + 0.027*"izmaylovsky" + 0.027*"izmaylovsky_park" + 0.024*"izmailovo" + 0.022*"сокольники" + 0.020*"альфа"
2017-07-20 16:02:56,980 : INFO : topic #4 (0.001): 0.069*"измайлово" + 0.059*"измайловский" + 0.057*"усадьба" + 0.056*"остров" + 0.054*"измайловский_остров" + 0.048*"усадьба_измайлово" + 0.011*"музей" + 0.009*"музей_усадьба" + 0.009*"картинг" + 0.009*"vscocam"
2017-07-20 16:02:56,985 : INFO : topic #9 (0.001): 0.013*"square" + 0.010*"очень" + 0.010*"мюзикл" + 0.010*"концертный" + 0.009*"краснаяплощадь" + 0.009*"всем" + 0.009*"концертный_измайлово" + 0.008*"измайловская" + 0.008*"спасибо" + 0.006*"первомай

2017-07-20 16:02:58,890 : INFO : topic #6 (0.001): 0.029*"make" + 0.029*"atelier" + 0.027*"make_atelier" + 0.025*"school" + 0.017*"бигуди" + 0.016*"парикмахерская_бигуди" + 0.015*"_______________________________________" + 0.015*"работа" + 0.012*"мастера" + 0.011*"ateliermoscowlife"
2017-07-20 16:02:58,893 : INFO : topic #8 (0.001): 0.037*"метро" + 0.029*"партизанская" + 0.011*"partizanskaya" + 0.011*"метро_партизанская" + 0.010*"сегодня" + 0.010*"очень" + 0.009*"станция" + 0.009*"спасибо" + 0.008*"выставка" + 0.007*"день"
2017-07-20 16:02:58,896 : INFO : topic diff=0.090917, rho=0.206862
2017-07-20 16:02:58,899 : INFO : PROGRESS: pass 5, at document #2000/17369
2017-07-20 16:02:59,245 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:02:59,271 : INFO : topic #7 (0.001): 0.071*"измайлово" + 0.040*"гамма" + 0.040*"дельта" + 0.040*"измайлово_гамма" + 0.032*"park" + 0.027*"izmaylovsky" + 0.027*"izmaylovsky_park" + 0.023*"izmailovo" + 0.022*"house" 

2017-07-20 16:03:01,112 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:01,132 : INFO : topic #6 (0.001): 0.043*"atelier" + 0.043*"make" + 0.038*"school" + 0.037*"make_atelier" + 0.011*"ateliermoscowlife" + 0.010*"спасибо" + 0.010*"delta" + 0.009*"makeupatelier" + 0.009*"школа" + 0.008*"день"
2017-07-20 16:03:01,136 : INFO : topic #5 (0.001): 0.118*"измайловский" + 0.090*"парк" + 0.075*"измайловский_парк" + 0.017*"бульвар" + 0.014*"измайловский_бульвар" + 0.011*"измайловскийпарк" + 0.009*"отдыха" + 0.009*"культуры" + 0.008*"культуры_отдыха" + 0.006*"парке"
2017-07-20 16:03:01,142 : INFO : topic #7 (0.001): 0.073*"измайлово" + 0.039*"гамма" + 0.039*"дельта" + 0.039*"измайлово_гамма" + 0.031*"park" + 0.025*"izmaylovsky" + 0.025*"izmaylovsky_park" + 0.022*"сокольники" + 0.021*"альфа" + 0.021*"izmailovo"
2017-07-20 16:03:01,145 : INFO : topic #0 (0.001): 0.181*"измайлово" + 0.148*"кремль" + 0.113*"кремль_измайлово" + 0.024*"измайловский" + 0.01

2017-07-20 16:03:03,764 : INFO : topic #3 (0.001): 0.056*"ргуфксит" + 0.019*"izmaylovo" + 0.019*"pervomayskaya" + 0.017*"district" + 0.016*"izmaylovo_district" + 0.016*"ргуфксмит" + 0.010*"ргуфк" + 0.010*"beta" + 0.008*"всем" + 0.007*"beta_hotel"
2017-07-20 16:03:03,768 : INFO : topic #2 (0.001): 0.018*"kremlin" + 0.016*"izmailovo" + 0.014*"izmailovo_kremlin" + 0.014*"пруд" + 0.011*"парковая" + 0.011*"круглый" + 0.011*"круглый_пруд" + 0.009*"счастье" + 0.009*"спасибо" + 0.007*"любовь"
2017-07-20 16:03:03,771 : INFO : topic #9 (0.001): 0.014*"square" + 0.011*"всем" + 0.011*"концертный" + 0.010*"очень" + 0.010*"мюзикл" + 0.010*"первомайская" + 0.010*"концертный_измайлово" + 0.008*"спасибо" + 0.007*"россии" + 0.006*"измайловская"
2017-07-20 16:03:03,780 : INFO : topic #8 (0.001): 0.023*"метро" + 0.013*"партизанская" + 0.012*"измайловского" + 0.011*"очень" + 0.011*"partizanskaya" + 0.011*"измайловского_зверинца" + 0.011*"зверинца" + 0.010*"спасибо" + 0.010*"сегодня" + 0.008*"краски"
2017-0

2017-07-20 16:03:05,621 : INFO : topic #0 (0.001): 0.187*"измайлово" + 0.156*"кремль" + 0.121*"кремль_измайлово" + 0.023*"площадь" + 0.023*"измайловский" + 0.021*"красная" + 0.019*"красная_площадь" + 0.013*"измайловскийкремль" + 0.010*"вернисаж" + 0.007*"опубликовано"
2017-07-20 16:03:05,623 : INFO : topic #7 (0.001): 0.075*"измайлово" + 0.042*"дельта" + 0.042*"гамма" + 0.041*"измайлово_гамма" + 0.031*"park" + 0.027*"izmaylovsky" + 0.027*"izmaylovsky_park" + 0.026*"izmailovo" + 0.022*"альфа" + 0.021*"hotel"
2017-07-20 16:03:05,626 : INFO : topic diff=0.075347, rho=0.206862
2017-07-20 16:03:05,928 : INFO : -8.767 per-word bound, 435.5 perplexity estimate based on a held-out corpus of 369 documents with 8011 words
2017-07-20 16:03:05,931 : INFO : PROGRESS: pass 5, at document #17369/17369
2017-07-20 16:03:06,048 : INFO : merging changes from 369 documents into a model of 17369 documents
2017-07-20 16:03:06,077 : INFO : topic #3 (0.001): 0.041*"ргуфксит" + 0.025*"izmaylovo" + 0.024*"distr

2017-07-20 16:03:07,604 : INFO : topic diff=0.075527, rho=0.202573
2017-07-20 16:03:07,607 : INFO : PROGRESS: pass 6, at document #5000/17369
2017-07-20 16:03:07,926 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:07,950 : INFO : topic #3 (0.001): 0.039*"ргуфксит" + 0.023*"pervomayskaya" + 0.021*"izmaylovo" + 0.020*"district" + 0.020*"izmaylovo_district" + 0.010*"beta" + 0.010*"ргуфксмит" + 0.008*"ргуфк" + 0.008*"beta_hotel" + 0.007*"всем"
2017-07-20 16:03:07,956 : INFO : topic #2 (0.001): 0.012*"пруд" + 0.012*"kremlin" + 0.012*"izmailovo" + 0.011*"anton" + 0.011*"парковая" + 0.011*"anton_nizhnick" + 0.011*"nizhnick" + 0.010*"izmailovo_kremlin" + 0.010*"счастье" + 0.009*"круглый"
2017-07-20 16:03:07,959 : INFO : topic #1 (0.001): 0.060*"hotel" + 0.051*"vega" + 0.043*"best" + 0.041*"center" + 0.041*"western" + 0.041*"best_western" + 0.041*"convention" + 0.039*"plus" + 0.039*"plus_vega" + 0.039*"hotel_convention"
2017-07-20 16:03:07,963 : INF

2017-07-20 16:03:10,518 : INFO : topic #4 (0.001): 0.067*"измайлово" + 0.062*"измайловский" + 0.057*"усадьба" + 0.054*"остров" + 0.052*"измайловский_остров" + 0.048*"усадьба_измайлово" + 0.010*"музей" + 0.009*"музей_усадьба" + 0.009*"vscocam" + 0.007*"картинг"
2017-07-20 16:03:10,521 : INFO : topic #6 (0.001): 0.035*"make" + 0.034*"atelier" + 0.031*"make_atelier" + 0.030*"school" + 0.014*"delta" + 0.012*"спасибо" + 0.010*"gamma" + 0.010*"gamma_delta" + 0.010*"школа" + 0.009*"макияж"
2017-07-20 16:03:10,524 : INFO : topic #2 (0.001): 0.015*"izmailovo" + 0.015*"kremlin" + 0.014*"пруд" + 0.013*"izmailovo_kremlin" + 0.011*"круглый" + 0.011*"круглый_пруд" + 0.011*"парковая" + 0.009*"спасибо" + 0.008*"счастье" + 0.007*"любовь"
2017-07-20 16:03:10,526 : INFO : topic #8 (0.001): 0.026*"метро" + 0.017*"партизанская" + 0.013*"сегодня" + 0.012*"очень" + 0.012*"спасибо" + 0.012*"partizanskaya" + 0.007*"рождения" + 0.007*"день" + 0.007*"станция" + 0.006*"метро_партизанская"
2017-07-20 16:03:10,529 

2017-07-20 16:03:12,414 : INFO : topic #9 (0.001): 0.015*"square" + 0.014*"первомайская" + 0.011*"очень" + 0.009*"всем" + 0.009*"краснаяплощадь" + 0.009*"концертный" + 0.008*"мюзикл" + 0.008*"спасибо" + 0.008*"концертный_измайлово" + 0.008*"измайловская"
2017-07-20 16:03:12,419 : INFO : topic #6 (0.001): 0.030*"make" + 0.028*"atelier" + 0.026*"school" + 0.025*"make_atelier" + 0.019*"delta" + 0.014*"gamma" + 0.014*"школа" + 0.013*"gamma_delta" + 0.010*"casa" + 0.010*"beauty"
2017-07-20 16:03:12,424 : INFO : topic diff=0.060745, rho=0.202573
2017-07-20 16:03:12,428 : INFO : PROGRESS: pass 6, at document #16000/17369
2017-07-20 16:03:12,746 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:12,783 : INFO : topic #1 (0.001): 0.069*"hotel" + 0.057*"vega" + 0.048*"best" + 0.046*"western" + 0.046*"best_western" + 0.045*"center" + 0.045*"convention" + 0.044*"plus" + 0.044*"plus_vega" + 0.044*"hotel_convention"
2017-07-20 16:03:12,791 : INFO : topic #5

2017-07-20 16:03:14,367 : INFO : topic #3 (0.001): 0.041*"ргуфксит" + 0.023*"pervomayskaya" + 0.023*"izmaylovo" + 0.022*"district" + 0.021*"izmaylovo_district" + 0.012*"beta" + 0.011*"парикмахерская" + 0.010*"ргуфксмит" + 0.009*"ргуфк" + 0.008*"beta_hotel"
2017-07-20 16:03:14,372 : INFO : topic diff=0.067648, rho=0.198540
2017-07-20 16:03:14,375 : INFO : PROGRESS: pass 7, at document #3000/17369
2017-07-20 16:03:14,700 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:14,725 : INFO : topic #3 (0.001): 0.040*"ргуфксит" + 0.023*"izmaylovo" + 0.023*"pervomayskaya" + 0.022*"district" + 0.021*"izmaylovo_district" + 0.011*"ргуфксмит" + 0.011*"beta" + 0.009*"парикмахерская" + 0.009*"ргуфк" + 0.008*"всем"
2017-07-20 16:03:14,728 : INFO : topic #2 (0.001): 0.013*"пруд" + 0.012*"kremlin" + 0.012*"парковая" + 0.012*"izmailovo" + 0.011*"izmailovo_kremlin" + 0.010*"круглый" + 0.010*"круглый_пруд" + 0.010*"счастье" + 0.008*"комплекс" + 0.007*"спасибо"
2017

2017-07-20 16:03:16,526 : INFO : topic #0 (0.001): 0.184*"измайлово" + 0.150*"кремль" + 0.115*"кремль_измайлово" + 0.024*"измайловский" + 0.017*"площадь" + 0.015*"красная" + 0.015*"измайловскийкремль" + 0.014*"красная_площадь" + 0.010*"вернисаж" + 0.008*"виноградный"
2017-07-20 16:03:16,532 : INFO : topic #3 (0.001): 0.046*"ргуфксит" + 0.021*"pervomayskaya" + 0.019*"izmaylovo" + 0.017*"district" + 0.016*"izmaylovo_district" + 0.013*"ргуфксмит" + 0.012*"beta" + 0.009*"beta_hotel" + 0.008*"проезд" + 0.008*"сегодня"
2017-07-20 16:03:16,536 : INFO : topic #9 (0.001): 0.013*"всем" + 0.010*"концертный" + 0.010*"концертный_измайлово" + 0.009*"очень" + 0.009*"square" + 0.008*"магазин" + 0.008*"мюзикл" + 0.007*"первомайская" + 0.007*"спасибо" + 0.007*"цена"
2017-07-20 16:03:16,539 : INFO : topic #6 (0.001): 0.040*"atelier" + 0.040*"make" + 0.035*"school" + 0.035*"make_atelier" + 0.012*"delta" + 0.010*"спасибо" + 0.010*"ateliermoscowlife" + 0.009*"школа" + 0.008*"gamma" + 0.008*"день"
2017-07-20

2017-07-20 16:03:19,059 : INFO : topic #3 (0.001): 0.058*"ргуфксит" + 0.020*"pervomayskaya" + 0.019*"izmaylovo" + 0.017*"district" + 0.017*"izmaylovo_district" + 0.016*"ргуфксмит" + 0.011*"ргуфк" + 0.011*"beta" + 0.008*"всем" + 0.007*"бета"
2017-07-20 16:03:19,063 : INFO : topic #1 (0.001): 0.070*"hotel" + 0.058*"vega" + 0.049*"best" + 0.047*"western" + 0.047*"best_western" + 0.046*"center" + 0.046*"convention" + 0.045*"plus" + 0.045*"plus_vega" + 0.045*"hotel_convention"
2017-07-20 16:03:19,067 : INFO : topic #7 (0.001): 0.079*"измайлово" + 0.049*"дельта" + 0.048*"гамма" + 0.047*"измайлово_гамма" + 0.030*"park" + 0.026*"izmaylovsky_park" + 0.026*"izmaylovsky" + 0.025*"izmailovo" + 0.021*"сокольники" + 0.021*"hotel"
2017-07-20 16:03:19,071 : INFO : topic diff=0.058318, rho=0.198540
2017-07-20 16:03:19,075 : INFO : PROGRESS: pass 7, at document #14000/17369
2017-07-20 16:03:19,397 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:19,422 : INFO

2017-07-20 16:03:20,996 : INFO : topic #8 (0.001): 0.036*"метро" + 0.028*"партизанская" + 0.012*"partizanskaya" + 0.010*"метро_партизанская" + 0.010*"очень" + 0.009*"спасибо" + 0.009*"станция" + 0.009*"сегодня" + 0.008*"измайловское" + 0.007*"шоссе"
2017-07-20 16:03:20,999 : INFO : topic diff=0.140405, rho=0.198540
2017-07-20 16:03:21,002 : INFO : PROGRESS: pass 8, at document #1000/17369
2017-07-20 16:03:21,315 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:21,342 : INFO : topic #8 (0.001): 0.036*"метро" + 0.029*"партизанская" + 0.011*"partizanskaya" + 0.011*"метро_партизанская" + 0.010*"очень" + 0.010*"сегодня" + 0.009*"спасибо" + 0.009*"станция" + 0.007*"выставка" + 0.007*"день"
2017-07-20 16:03:21,345 : INFO : topic #2 (0.001): 0.015*"парковая" + 0.014*"izmailovo" + 0.013*"kremlin" + 0.012*"пруд" + 0.011*"izmailovo_kremlin" + 0.009*"круглый" + 0.008*"круглый_пруд" + 0.008*"комплекс" + 0.008*"счастье" + 0.007*"спасибо"
2017-07-20 16:03:

2017-07-20 16:03:23,173 : INFO : topic #4 (0.001): 0.064*"измайловский" + 0.061*"измайлово" + 0.054*"усадьба" + 0.052*"остров" + 0.050*"измайловский_остров" + 0.045*"усадьба_измайлово" + 0.012*"проспект" + 0.011*"измайловский_проспект" + 0.009*"картинг" + 0.009*"платьев"
2017-07-20 16:03:23,179 : INFO : topic #2 (0.001): 0.013*"kremlin" + 0.013*"izmailovo" + 0.012*"пруд" + 0.011*"izmailovo_kremlin" + 0.010*"парковая" + 0.009*"anton" + 0.009*"круглый" + 0.009*"anton_nizhnick" + 0.009*"nizhnick" + 0.009*"счастье"
2017-07-20 16:03:23,183 : INFO : topic #1 (0.001): 0.065*"hotel" + 0.053*"vega" + 0.044*"best" + 0.042*"western" + 0.042*"best_western" + 0.042*"center" + 0.042*"convention" + 0.040*"plus" + 0.039*"plus_vega" + 0.039*"hotel_convention"
2017-07-20 16:03:23,188 : INFO : topic #8 (0.001): 0.030*"метро" + 0.020*"партизанская" + 0.013*"очень" + 0.012*"partizanskaya" + 0.012*"сегодня" + 0.009*"спасибо" + 0.008*"станция" + 0.008*"метро_партизанская" + 0.007*"день" + 0.006*"рождения"
20

2017-07-20 16:03:25,718 : INFO : topic #4 (0.001): 0.066*"измайлово" + 0.062*"измайловский" + 0.056*"усадьба" + 0.055*"остров" + 0.054*"измайловский_остров" + 0.047*"усадьба_измайлово" + 0.010*"музей" + 0.010*"vscocam" + 0.008*"музей_усадьба" + 0.007*"картинг"
2017-07-20 16:03:25,722 : INFO : topic #1 (0.001): 0.070*"hotel" + 0.059*"vega" + 0.050*"best" + 0.047*"western" + 0.047*"best_western" + 0.047*"center" + 0.047*"convention" + 0.046*"plus" + 0.046*"plus_vega" + 0.046*"hotel_convention"
2017-07-20 16:03:25,725 : INFO : topic diff=0.075903, rho=0.194739
2017-07-20 16:03:25,727 : INFO : PROGRESS: pass 8, at document #12000/17369
2017-07-20 16:03:26,033 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:26,057 : INFO : topic #8 (0.001): 0.023*"метро" + 0.013*"партизанская" + 0.012*"очень" + 0.011*"измайловского" + 0.011*"partizanskaya" + 0.011*"спасибо" + 0.010*"сегодня" + 0.010*"зверинца" + 0.010*"измайловского_зверинца" + 0.007*"краски"
20

2017-07-20 16:03:27,475 : INFO : PROGRESS: pass 8, at document #17000/17369
2017-07-20 16:03:27,769 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:27,796 : INFO : topic #0 (0.001): 0.187*"измайлово" + 0.157*"кремль" + 0.121*"кремль_измайлово" + 0.023*"площадь" + 0.023*"измайловский" + 0.021*"красная" + 0.019*"красная_площадь" + 0.013*"измайловскийкремль" + 0.010*"вернисаж" + 0.007*"опубликовано"
2017-07-20 16:03:27,800 : INFO : topic #8 (0.001): 0.039*"метро" + 0.023*"партизанская" + 0.011*"метро_партизанская" + 0.011*"станция" + 0.010*"спасибо" + 0.010*"partizanskaya" + 0.010*"очень" + 0.009*"сегодня" + 0.007*"день" + 0.007*"станция_метро"
2017-07-20 16:03:27,807 : INFO : topic #9 (0.001): 0.025*"всем" + 0.022*"цена" + 0.021*"заказа" + 0.015*"первомайская" + 0.014*"square" + 0.013*"цена_всем" + 0.013*"вопросам_пишите" + 0.009*"концертный" + 0.008*"мюзикл" + 0.008*"очень"
2017-07-20 16:03:27,810 : INFO : topic #2 (0.001): 0.017*"izmailovo"

2017-07-20 16:03:29,731 : INFO : topic #4 (0.001): 0.066*"измайловский" + 0.057*"измайлово" + 0.051*"усадьба" + 0.050*"остров" + 0.048*"измайловский_остров" + 0.043*"усадьба_измайлово" + 0.017*"проспект" + 0.017*"измайловский_проспект" + 0.013*"платьев" + 0.013*"магазин_платьев"
2017-07-20 16:03:29,737 : INFO : topic #8 (0.001): 0.028*"метро" + 0.021*"партизанская" + 0.013*"очень" + 0.012*"сегодня" + 0.011*"partizanskaya" + 0.009*"спасибо" + 0.008*"метро_партизанская" + 0.007*"день" + 0.007*"станция" + 0.006*"рождения"
2017-07-20 16:03:29,741 : INFO : topic #9 (0.001): 0.016*"всем" + 0.014*"магазин" + 0.014*"цена" + 0.011*"наличии" + 0.009*"очень" + 0.009*"первомайская" + 0.008*"концертный" + 0.008*"square" + 0.008*"концертный_измайлово" + 0.007*"заказа"
2017-07-20 16:03:29,743 : INFO : topic #5 (0.001): 0.118*"измайловский" + 0.086*"парк" + 0.073*"измайловский_парк" + 0.019*"бульвар" + 0.014*"измайловский_бульвар" + 0.014*"измайловскийпарк" + 0.008*"отдыха" + 0.008*"культуры" + 0.007*

2017-07-20 16:03:31,529 : INFO : topic #9 (0.001): 0.013*"всем" + 0.011*"первомайская" + 0.011*"концертный" + 0.010*"square" + 0.010*"концертный_измайлово" + 0.008*"очень" + 0.007*"мюзикл" + 0.007*"спасибо" + 0.007*"магазин" + 0.007*"качка"
2017-07-20 16:03:31,532 : INFO : topic #1 (0.001): 0.068*"hotel" + 0.055*"vega" + 0.047*"best" + 0.044*"western" + 0.044*"best_western" + 0.044*"center" + 0.044*"convention" + 0.042*"plus" + 0.042*"plus_vega" + 0.042*"hotel_convention"
2017-07-20 16:03:31,538 : INFO : topic diff=0.049655, rho=0.191148
2017-07-20 16:03:32,300 : INFO : -8.298 per-word bound, 314.8 perplexity estimate based on a held-out corpus of 1000 documents with 14772 words
2017-07-20 16:03:32,303 : INFO : PROGRESS: pass 9, at document #10000/17369
2017-07-20 16:03:32,622 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:32,645 : INFO : topic #8 (0.001): 0.026*"метро" + 0.017*"партизанская" + 0.013*"сегодня" + 0.012*"очень" + 0.012*"спас

2017-07-20 16:03:34,093 : INFO : topic diff=0.051234, rho=0.191148
2017-07-20 16:03:34,096 : INFO : PROGRESS: pass 9, at document #15000/17369
2017-07-20 16:03:34,407 : INFO : merging changes from 1000 documents into a model of 17369 documents
2017-07-20 16:03:34,435 : INFO : topic #4 (0.001): 0.070*"измайлово" + 0.060*"измайловский" + 0.058*"усадьба" + 0.056*"остров" + 0.054*"измайловский_остров" + 0.048*"усадьба_измайлово" + 0.011*"музей" + 0.009*"музей_усадьба" + 0.009*"картинг" + 0.009*"vscocam"
2017-07-20 16:03:34,441 : INFO : topic #8 (0.001): 0.022*"метро" + 0.013*"партизанская" + 0.012*"очень" + 0.011*"спасибо" + 0.010*"сегодня" + 0.010*"partizanskaya" + 0.008*"холи" + 0.007*"измайловского" + 0.007*"день" + 0.007*"метро_партизанская"
2017-07-20 16:03:34,444 : INFO : topic #1 (0.001): 0.074*"hotel" + 0.057*"vega" + 0.049*"best" + 0.047*"western" + 0.047*"best_western" + 0.046*"center" + 0.046*"convention" + 0.045*"plus" + 0.044*"plus_vega" + 0.044*"hotel_convention"
2017-07-20 1

Wall time: 1min 19s


In [None]:
# import nltk
# nltk.download()

In [14]:
data = pyLDAvis.gensim.prepare(model, corpus, dictionary); # visualize lda topics
pyLDAvis.display(data)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]


# Assign topics to users

In [15]:
docTopicProbMat = model[corpus]
lda_users = vk_users.copy()
lda_users['topics'] = docTopicProbMat
vk_users['topic'] = lda_users['topics'].apply(lambda x :x[0][0])

# Assign topics to their posts

In [16]:
neigh_posts['topic'] = neigh_posts['userId'].apply(lambda userId: vk_users.loc[vk_users['userId']==userId,'topic'].item())

# Plot the histogram of topic distribution per posts and per users

# Mapping

In [17]:
center_lat = list(district.centroid[0].coords)[0][1]
center_lon = list(district.centroid[0].coords)[0][0]

In [18]:
map_places = folium.Map([center_lat, center_lon], tiles='Stamen Toner', zoom_start=14,control_scale=True)

#Define style for geojson objects
style_function = lambda feature: dict(fillColor='#AECCAE',
                                      color='#AECCAE',
                                      weight=1,
                                      opacity=0.3)

# Adding Houses
houses = gpd.read_file('../Data dive/dd2/{}/{}_chruchevki.geogson'.format(name, name))
points = folium.features.GeoJson(houses,name='Khurshevki houses')
map_places.add_child(points,name='Khurshevki houses')

#Adding district
polygon = folium.features.GeoJson(district, style_function=style_function,name='district boundary')
map_places.add_child(polygon,name='district boundary')

colormap_dict = {.0: 'blue', .2: 'cyan', .4: 'green', .6: 'yellow', .8:'orange', 1.:'red'}

#Adding topics heatmaps
for topic_id in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    topic_coords = list(zip(neigh_posts[neigh_posts.topic == topic_id].lat, neigh_posts[neigh_posts.topic == topic_id].lon))

    HeatMap(topic_coords,
            name='Topic: {}'.format(topic_id),
            radius=10, 
            min_opacity=0.8,
            gradient={.0: 'blue', .2: 'cyan', .4: 'green', .6: 'yellow', .8:'orange', 1.:'red'}).add_to(map_places)


    colormap = folium.LinearColormap(colors = colormap_dict.values())
    colormap.caption = 'Topic: {}'.format(topic_id)

#Switch between layers
folium.LayerControl().add_to(map_places)
map_places