In [1]:
# import
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook  # 진행과정 시각화
from datetime import timedelta  # 시간날짜
import pandas as pd
import numpy as np
import gc
import re

import wordcloud
from PIL import Image
import random
import os

import imageio

#한글깨짐방지
plt.rc('font',family='Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False

from IPython.core.display import display, HTML
display(HTML('<style>.container {width:100% !important; }</style>'))

### load

In [83]:
sw = list(pd.read_excel("stopword(cp949).xlsx",encoding = 'cp949')['불용어']) #불용어 불러오기
path = './output/token_통합/'
file_list = os.listdir(path)

### sample

In [81]:
file = file_list[0]
file_df = pd.read_csv(path+file)

keyword = file.split('_')[0]

In [85]:
# 특정 키워드 전용 stopword 필요
custom_sw = dict()
custom_sw['강릉'] = ['강릉','강릉시']
custom_sw['양양'] = ['양양','양양군']
custom_sw['속초'] = ['속초','속초시']

In [147]:
def draw_wordcloud(df, stopword,custom_sw, keyword, options = "all"):
    
    #stopword
    stopword.append(keyword)
    stopword = stopword +custom_sw[keyword]
    stopword = list(set(stopword))
    
    #flattern 
    df = df.fillna("")
    df['Noun'] = df['Noun'].apply(lambda x : re.sub("[\[\]' ]","",x).split(','))
    df['Adjective'] = df['Adjective'].apply(lambda x : re.sub("[\[\]' ]","",x).split(','))
    
    all_noun_flatten = [y for x in df["Noun"] for y in x]
    all_adj_flatten = [y for x in df["Adjective"] for y in x]
    all_word_flatten = all_noun_flatten + all_adj_flatten
    
    if options == 'all':
        all_word_flatten = all_noun_flatten + all_adj_flatten
    elif options == 'noun':
        all_word_flatten = [y for x in df["Noun"] for y in x]
    elif options == 'adjective':
        all_adj_flatten = [y for x in df["Adjective"] for y in x]
    
    years = df.year.unique()
    if len(years) != 1:
        last_year = years.max()
        start_year = years.min()
        time = '{s}~{l}'.format(s = start_year, l = last_year)
    else :
        time = years[0]
    
    # 워드클라우드
    # 마스킹
    img_path = "vector-cloud-png.png"
    img = Image.open(img_path).convert('RGBA')
    mask = Image.new("RGB", img.size, (255, 255, 255))
    mask.paste(img, img)
    mask = np.array(mask)

    text = ' '.join(all_word_flatten)

    wordc = wordcloud.WordCloud(background_color='White', max_words=200,
                                font_path='C:/Windows/Fonts/malgun.ttf',
                                relative_scaling=0.5,
                                stopwords=stopword,
                                collocations=False,
                                mask=mask)
    wordc.generate(text)

    def grey_color(word, font_size, position, orientation, random_state=None, **kwargs):
        return 'hsl(%d,100%%,50%%)' % random.randint(200, 300)  # 색상 채도 밝기
    wordc.recolor(color_func=grey_color, random_state=3)

    # 그리기
    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.title(f'{keyword}_{time}_{options}',fontsize = 15)
    plt.imshow(wordc, interpolation='bilinear')
    
    #저장
    os.makedirs(f'./output/워드클라우드/{keyword}_{options}/',exist_ok=True)
    plt.savefig(f'./output/워드클라우드/{keyword}_{options}/{keyword}_{time}_{options}.png')
    plt.close()

In [148]:
#연차별 적용
years = file_df.year.unique()
for year in tqdm_notebook(years,desc = "year"):
    sample = file_df[file_df['year'] == year]
    draw_wordcloud(sample,sw,custom_sw,keyword)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, description='year', max=10.0, style=ProgressStyle(description_width='i…




### gif 만들기
* 2010년부터 2019까지 추이변화를 볼수 있는 특징이 있다

In [2]:
def make_gif(path):
    '''
    이미지 여러장이 들어있는 폴더를 input하면 gif를 만들어냄
    jpg, png파일만 허용
    '''
    from PIL import Image
    import os
    import imageio
    
    file_list = os.listdir(path)
    
    #select png
    png_ls =[]
    for file in file_list:
        try :
            if file.split('.')[1] in (['png','jpg']):
                png_ls.append(file)
        except : 
            pass
    
    #naming
    main = png_ls[0].split('_')[0]
    tail = png_ls[0].split('_')[2]
    start = png_ls[0].split('_')[1]
    end = png_ls[-1].split('_')[1]
                                      
    images = [np.array(Image.open(path+file)) for file in png_ls]
    imageio.mimsave(f'./output/워드클라우드/{main}_{start}~{end}_tail.gif', images, fps=0.5)

In [4]:
path = "./output/워드클라우드/강릉_all/"
make_gif(path)

### 작업용

In [58]:
df = file_df.fillna("")
df['Noun'] = df['Noun'].apply(lambda x : re.sub("[\[\]' ]","",x).split(','))
df['Adjective'] = df['Adjective'].apply(lambda x : re.sub("[\[\]' ]","",x).split(','))

In [59]:
all_noun_flatten = [y for x in df["Noun"] for y in x]
all_adj_flatten = [y for x in df["Noun"] for y in x]
all_word_flatten = all_noun_flatten + all_adj_flatten

In [61]:
sw.append(keyword)
sw = sw +custom_sw[keyword]
sw = list(set(sw))

In [70]:
years = df.year.unique()
if len(years) != 1:
    last_year = years.max()
    start_year = years.min()
    time = '{s}~{l}'.format(s = start_year, l = last_year)
else :
    time = years[0]

In [143]:
# 워드클라우드
# 마스킹
img_path = "vector-cloud-png.png"
img = Image.open(img_path).convert('RGBA')
mask = Image.new("RGB", img.size, (255, 255, 255))
mask.paste(img, img)
mask = np.array(mask)

text = ' '.join(all_word_flatten)

wordc = wordcloud.WordCloud(background_color='White', max_words=200,
                            font_path='C:/Windows/Fonts/malgun.ttf',
                            relative_scaling=0.5,
                            stopwords=sw,
                            collocations=False,
                            mask=mask)
wordc.generate(text)

def grey_color(word, font_size, position, orientation, random_state=None, **kwargs):
    return 'hsl(%d,100%%,50%%)' % random.randint(200, 300)  # 색상 채도 밝기
wordc.recolor(color_func=grey_color, random_state=3)

# 그리기
plt.figure(figsize=(10, 10))
plt.axis('off')
plt.title('{keyword}_{time}'.format(keyword = keyword, time = time),fontsize = 15)
plt.imshow(wordc, interpolation='bilinear')
plt.savefig(f'./output/워드클라우드/{keyword}_{time}.png')
plt.close()

### 이하작업본 gif

In [112]:
path = "./output/워드클라우드/"

In [141]:
make_gif(path)

In [131]:
png_ls[1]

'강릉_2011_all.png'

In [133]:
png_ls[1].split('.')[1] in (['png','jpg'])

True

In [113]:
file_list = os.listdir(path)

In [115]:
png_ls =[]
for file in file_list:
    try :
        if file.split('.')[1] == 'png':
            png_ls.append(file)
    except : 
        pass


In [125]:
main = png_ls[0].split('_')[0]
tail = png_ls[0].split('_')[2]
start = png_ls[0].split('_')[1]
end = png_ls[-1].split('_')[1]

In [122]:
# path = [f"./pngs/{i}" for i in os.listdir("./pngs")]
paths = [ np.array(Image.open(path+file)) for file in png_ls]
np.array(Image.open(path+file)) for file in png_ls
imageio.mimsave(f'./output/워드클라우드/{keyword}.gif', paths, fps=0.5)

In [None]:
np.array(Image.(path+file)) for file in png_ls