In [1]:
from PIL import Image
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import json
import os

In [2]:
class SimpleGroupedColorFunc(object):
    def __init__(self, color_to_words, default_color):
        self.word_to_color = {word: color
                              for (color, words) in color_to_words.items()
                              for word in words}

        self.default_color = default_color

    def __call__(self, word, **kwargs):
        return self.word_to_color.get(word, self.default_color)

In [3]:
font_path = 'BlackHanSans-Regular.ttf'
circle_mask = np.array(Image.open("circle.png"))
kaist_mask = np.array(Image.open('KAIST.JPG'))

In [4]:
wc_circle = WordCloud(font_path=font_path, background_color="white", mask=circle_mask, width=800, height=800)
wc_kaist = WordCloud(font_path=font_path, background_color="white", mask=kaist_mask, width=1000, height=500)

In [5]:
for file in os.listdir('json_file'):
    words = {}
    file_dir = 'json_file/' + file
    with open(file_dir, encoding='utf-8-sig') as json_file:
        json_data = json.load(json_file)
        for word in json_data:
            keyword = word['keyword']
            score = word['score']
            words[keyword] = score
    words_sorted = sorted(words, key= lambda x: words[x], reverse=True)
    color_to_words = {'coral': [], 'pink': []}
    for i, word in enumerate(words_sorted):
        if i<=3:
            color_to_words['coral'].append(word)
        elif i<=10:
            color_to_words['pink'].append(word)
        else:
            break
    simple_color_func = SimpleGroupedColorFunc(color_to_words, 'skyblue')
    circle = wc_circle.generate_from_frequencies(words)
    kaist = wc_kaist.generate_from_frequencies(words)
    circle.recolor(color_func=simple_color_func)
    kaist.recolor(color_func=simple_color_func)
    fig = plt.figure(figsize=(10, 10))
    plt.axis("off")
    if file.split('_')[0] == "korean":
        directory = 'wordcloud/korean/'
    elif file.split('_')[0] == "english":
        directory = 'wordcloud/english/'
    else:
        directory = 'wordcloud/journal/'
    plt.imshow(circle, interpolation="bilinear")
    fig.savefig(directory+file+'_circle.png')
    plt.imshow(kaist, interpolation="bilinear")
    fig.savefig(directory+file+'_kaist.png')
    plt.close(fig)