# 社畜丼ワードクラウド

http://qiita.com/kenmatsu4/items/9b6ac74f831443d29074

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image
import MeCab as mc

In [None]:
def mecab_analysis(text):
    mecab_flags = [
        '-Ochasen',
        '-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd/',
        '-u username.dic',
    ]
    t = mc.Tagger(' '.join(mecab_flags))
    enc_text = text.strip() # MeCabに渡した文字列は必ず変数に入れておく https://shogo82148.github.io/blog/2012/12/15/mecab-python/
    t.parse('') # UnicodeDecodeError対策 http://taka-say.hateblo.jp/entry/2015/06/24/183748 
    node = t.parseToNode(enc_text)
    output = []
    while(node):
        if node.surface != "":  # ヘッダとフッタを除外
            word_type = node.feature.split(",")[0]
            if word_type in ["形容詞", "名詞", "副詞"]:
                output.append(node.surface)
        node = node.next
        if node is None:
            break
    return output

def create_wordcloud(text, background_image='background'):

    # 環境に合わせてフォントのパスを指定する。
    #fpath = "/System/Library/Fonts/HelveticaNeue-UltraLight.otf"
    #fpath = "/Library/Fonts/ヒラギノ角ゴ Pro W3.otf"
    fpath = "/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc"

    # ストップワードの設定
    stop_words = [ u'てる', u'いる', u'なる', u'れる', u'する', u'ある', u'こと', u'これ', u'さん', u'して', \
             u'くれる', u'やる', u'くださる', u'そう', u'せる', u'した',  u'思う',  \
             u'それ', u'ここ', u'ちゃん', u'くん', u'', u'て',u'に',u'を',u'は',u'の', u'が', u'と', u'た', u'し', u'で', \
             u'ない', u'も', u'な', u'い', u'か', u'ので', u'よう', u'']

    wordcloud = WordCloud(background_color="white",font_path=fpath, width=900, height=500, \
                          stopwords=set(stop_words)).generate(text)

    mer_coloring = np.array(Image.open(background_image))
#    wordcloud = WordCloud(background_color="white",font_path=fpath, width=900, height=500, \
#                          stopwords=set(stop_words)).generate(text)
    wordcloud = WordCloud(background_color="white",font_path=fpath, width=900, height=500, \
                          mask=mer_coloring, stopwords=set(stop_words)).generate(text)
    image_colors = ImageColorGenerator(mer_coloring)
### using custom colors https://amueller.github.io/word_cloud/auto_examples/a_new_hope.html
#    import random
#    def grey_color_func(word, font_size, position, orientation, random_state=None,
#                        **kwargs):
#        return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)
    
    
    plt.figure(figsize=(15,12))
#    plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
#           interpolation="bilinear")
#    plt.imshow(wordcloud)
    plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [None]:
def get_current_toots():
    return pd.read_csv(
        "toot_data.current.tsv",
        delimiter='\t',
        header=None,
        names=['created_at', 'username', 'userid', 'toot']
    )

def toot_convert(toots):
    import re
    return toots.map(
        lambda s: re.sub('<[^>]*>', '', s)
    ).map(
        lambda s: re.sub(r'https?://[^ ]+', "", s)
    )

In [None]:
from utils import time_settings, time_clipping
from IPython.display import display
datepicker, time_range = time_settings()
display(datepicker, time_range)

In [None]:
df_ranged = time_clipping(get_current_toots(), datepicker.value, *time_range.value)
# 全トゥートを結合して形態素解析に流し込んで単語に分割する
wordlist = mecab_analysis(' '.join(toot_convert(df_ranged['toot']).tolist()))
#返ってきたリストを結合してワードクラウドにする
create_wordcloud(' '.join(wordlist))