In [1]:
import glob

import pandas as pd

from wordcloud import WordCloud
from janome.tokenizer import Tokenizer
from janome.analyzer import Analyzer
from janome.tokenfilter import CompoundNounFilter


In [2]:
'''
タイトルのワードクラウド
'''

df = pd.read_excel('session_name/sessions.xlsx')
df_2000 = df[df['year']<2010]
df_2010 = df[(df['year'] >=2010) & (df['year']<2020)]
df_2020 = df[(df['year'] >=2020) & (df['year']<2030)]

wc = WordCloud(width=320, height=480, background_color="white",
            #    stopwords={"もの","これ","ため","それ","ところ","よう"},
               font_path="/System/Library/Fonts/ヒラギノ角ゴシック W6.ttc")
for dataframe, filename in zip([df_2000, df_2010, df_2020], ['2000','2010','2020']):
    wc.generate(" ".join(dataframe['session_name'].values.tolist()))
    wc.to_file('output/'+filename+'_session_name.png')

In [12]:
li = []
for filename in glob.glob('paper_title/*.tsv'):
    df = pd.read_csv(filename, header=None, delimiter="\t")
    df['year'] = int(filename.split('/')[1].split('.')[0])
    li.append(df)

df = pd.concat(li, axis=0, ignore_index=True)
df = df.rename(columns={0:'id',1:'title'})

df_1990 = df[df['year']<2000]
df_2000 = df[(df['year'] >=2000) & df['year']<2010]
df_2010 = df[(df['year'] >=2010) & (df['year']<2020)]
df_2020 = df[(df['year'] >=2020) & (df['year']<2030)]



tokenizer    = Tokenizer('user_dictionary.csv', udic_type="simpledic", udic_enc="utf8")
token_filter = [CompoundNounFilter()]
analyzer     = Analyzer(tokenizer=tokenizer, token_filters=token_filter)

for df_sub, y in zip([df_1990,df_2000,df_2010,df_2020],[1990,2000,2010,2020]):
    
    sent = ""

    for title in df_sub.title.values.tolist():
        ## 日本語の場合のみ処理
        if (title[0].isascii() == False and title[len(title)-1].isascii()==False):
            for token in analyzer.analyze(title):
                if token.part_of_speech.split(',')[0] in ["名詞",'カスタム名詞']:
                    # print(token)
                    sent += token.surface + " " # 表層形

    wc = WordCloud(width= 320, height=480, 
                background_color="white",
                # colormap='tab20',
                contour_color='black',
                contour_width=3,
                stopwords={
                "ため",'こと',"検討",'手法','型','対象','提案','考慮',
                '分析','実現','性能評価','評価','実装','利用','方式','化','開発','応用','構築','機能',
                '的','性','法','他','場','一',
                '可能','設計','研究','一考察','考察','一検討','ー検討','一実装','検証','活用','調査',
                '目的','着目','向上','影響','改良','試作','適用','改善','導入',
                '招待講演:','招待講演','招聘講演','テーブルディスカッション','パネルディスカッション','特別企画','若手研究者',
                'for','of','based','A','System','in','on','and','object'},
                font_path="/System/Library/Fonts/ヒラギノ角ゴシック W6.ttc")
    wc.generate(sent)
    wc.to_file('output/title_'+str(y)+'.png')

