In [None]:
import pandas as pd
from tqdm import tqdm
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
from IPython.display import display


In [None]:
class KakaoTalkAnalyzer:
    def __init__(self, chat_file):
        self.chat_file = chat_file
        self.chats = {}
        self.gifts = {}
        self.badas = {}
        self.months = []
        self.years = []

        self.key_sentences = {
            "gift_start": [
                "선착순 선물 게임을 시작합니다! 기회는 단",
                "선착순 선물게임을 시작합니다! 기회는 단"
            ],
            "gift_win": [
                "축하합니다. 선착순 선물에 당첨되었어요!"
            ]
        }
    
    def load_data(self):
        with open(self.chat_file, 'r') as f:
            for line in tqdm(f.readlines()):
                self.process_line(line.strip())
    
    def process_line(self, line):
        if line[:2] == '20' and '년' in line and '월' in line and '일' in line and ('오전' in line or '오후' in line) and line.count(':') == 2:
            index_comma = line.index(',')
            index_colon = line.index(':', index_comma)
            name = line[index_comma+2:index_colon-1]

            YYYY = line[:4]
            index_blank = line.index(' ')
            index_month = line.index('월')
            MM = line[index_blank+1:index_month]
            if len(MM) == 1:
                MM = '0' + MM
            YYYY_MM = YYYY + '-' + MM
            if YYYY_MM not in self.months:
                self.months.append(YYYY_MM)
            if YYYY not in self.years:
                self.years.append(YYYY)

            self.update_dict(self.chats, name, YYYY_MM, YYYY)
            if any(phrase in line[index_colon+2:] for phrase in self.key_sentences["gift_start"]):
                self.update_dict(self.gifts, name, YYYY_MM, YYYY)
            if any(phrase in line[index_colon+2:] for phrase in self.key_sentences["gift_win"]):
                self.update_dict(self.badas, name, YYYY_MM, YYYY)
    
    def update_dict(self, dictionary, name, month, year):
        if name not in dictionary:
            dictionary[name] = {month: 1, year: 1}
        else:
            if month not in dictionary[name]:
                dictionary[name][month] = 1
            else:
                dictionary[name][month] += 1
            if year not in dictionary[name]:
                dictionary[name][year] = 1
            else:
                dictionary[name][year] += 1
    
    def create_dataframe(self, dictionary, time_periods):
        data_list = {'기간': [*time_periods]}
        for participant in dictionary.keys():
            data_list[participant] = []
            for period in time_periods:
                if period not in dictionary[participant]:
                    data_list[participant].append(0)
                else:
                    data_list[participant].append(dictionary[participant][period])

        df = pd.DataFrame(data_list, columns=data_list.keys())
        df = df.set_index('기간')

        total_series = df.sum(axis=1)
        total_series.name = '전체'
        df = pd.concat([total_series, df], axis=1)

        total_row = df.sum(axis=0)
        total_row.name = '합계'
        df = pd.concat([df, pd.DataFrame(total_row).T])

        df = df.sort_values(by='합계', axis=1, ascending=False)
        
        return df

    def get_dataframes(self):
        df_chats_monthly = self.create_dataframe(self.chats, self.months)
        df_gifts_monthly = self.create_dataframe(self.gifts, self.months)
        df_badas_monthly = self.create_dataframe(self.badas, self.months)
        
        df_chats_yearly = self.create_dataframe(self.chats, self.years)
        df_gifts_yearly = self.create_dataframe(self.gifts, self.years)
        df_badas_yearly = self.create_dataframe(self.badas, self.years)
        
        return df_chats_monthly, df_gifts_monthly, df_badas_monthly, df_chats_yearly, df_gifts_yearly, df_badas_yearly


In [None]:
chat_analyzer = KakaoTalkAnalyzer('KakaoTalkChats.txt')
chat_analyzer.load_data()
df_chats_monthly, df_gifts_monthly, df_badas_monthly, df_chats_yearly, df_gifts_yearly, df_badas_yearly = chat_analyzer.get_dataframes()


In [None]:
def configure_plotting():
    plt.rcParams['font.family'] = 'Apple SD Gothic Neo'
    plt.rcParams['font.size'] = 14
    plt.rcParams['figure.dpi'] = 200

    palette = [
        (48, 103, 155),
        (150, 159, 83)
    ]
    white = (255, 255, 255)
    gray = (52, 52, 52)

    palette = np.array(palette)/255
    white = np.array(white)/255
    gray = np.array(gray)/255

    mpl.rcParams['figure.facecolor'] = white
    mpl.rcParams['figure.edgecolor'] = white
    mpl.rcParams['axes.facecolor'] = white

    mpl.rcParams['text.color'] = gray
    mpl.rcParams['axes.labelcolor'] = gray
    mpl.rcParams['axes.edgecolor'] = gray
    mpl.rcParams['xtick.color'] = gray
    mpl.rcParams['ytick.color'] = gray

configure_plotting()


In [None]:
def plot_data(df_chats, df_gifts, title):
    x_chats = list(df_chats['전체'].index.values)[:-1]
    y_chats = list(df_chats['전체'].values)[:-1]

    x_gifts = list(df_gifts['전체'].index.values)[:-1]
    y_gifts = list(df_gifts['전체'].values)[:-1]

    fig, ax = plt.subplots(1, 1, figsize=(12, 6))

    ax.set_title(title, fontsize=20)

    ax.plot(x_chats, y_chats, color=(48/255, 103/255, 155/255), linewidth=5.0)
    ax.tick_params(axis='y', labelcolor=(48/255, 103/255, 155/255))
    ax.tick_params(axis='x', labelrotation=90)
    ax.set_xlabel('기간')
    ax.set_ylabel('대화량')
    for i, j in zip(x_chats, y_chats):
        ax.annotate(j, xy=(i,j+1), color=(255/255, 255/255, 255/255), bbox=dict(facecolor=(48/255, 103/255, 155/255), edgecolor='none', pad=1.0), zorder=2)

    ax2 = ax.twinx()
    ax2.plot(x_gifts, y_gifts, color=(150/255, 159/255, 83/255), linewidth=5.0)
    ax2.tick_params(axis='y', labelcolor=(150/255, 159/255, 83/255))
    ax2.set_ylabel('선착순 쏜 횟수')
    for i, j in zip(x_gifts, y_gifts):
        ax2.annotate(j, xy=(i,j+1), color=(255/255, 255/255, 255/255), bbox=dict(facecolor=(150/255, 159/255, 83/255), edgecolor='none', pad=1.0), zorder=2)

    ax.margins(x=0)
    plt.show()

plot_data(df_chats_monthly, df_gifts_monthly, '월별 대화량과 선착순 쏜 횟수')
# plot_data(df_chats_yearly, df_gifts_yearly, '연도별 대화량과 선착순 쏜 횟수')


In [None]:
def display_stats(df_chats, df_gifts, df_badas, selected_period):
    print(f"총 대화량: {df_chats.loc[selected_period].values[0]}")
    print(f"대화에 참여한 사람 수: {len([i for i in (df_chats.loc[selected_period].values).tolist() if i>0])-1}\n")

    for i in range(1, len(df_chats.loc[selected_period])):
        pi_chats = df_chats.sort_values(by=df_chats.loc[selected_period].name, axis=1, ascending=False)
        if int(pi_chats.loc[selected_period].values[i])>0:
            print(f"{pi_chats.loc[selected_period].index[i]}: {pi_chats.loc[selected_period].values[i]}")

    print(f"선착순 쏜 횟수: {df_gifts.loc[selected_period].values[0]}")
    print(f"선착순 쏜 사람 수: {len([i for i in (df_gifts.loc[selected_period].values).tolist() if i>0])-1}\n")

    for i in range(1, len(df_gifts.loc[selected_period])):
        pi_gifts = df_gifts.sort_values(by=df_gifts.loc[selected_period].name, axis=1, ascending=False)
        if int(pi_gifts.loc[selected_period].values[i])>0:
            print(f"{pi_gifts.loc[selected_period].index[i]}: {pi_gifts.loc[selected_period].values[i]}")

    print(f"선착순 받은 횟수: {df_badas.loc[selected_period].values[0]}")
    print(f"선착순 받은 사람 수: {len([i for i in (df_badas.loc[selected_period].values).tolist() if i>0])-1}\n")

    for i in range(1, len(df_badas.loc[selected_period])):
        pi_badas = df_badas.sort_values(by=df_badas.loc[selected_period].name, axis=1, ascending=False)
        if int(pi_badas.loc[selected_period].values[i])>0:
            print(f"{pi_badas.loc[selected_period].index[i]}: {pi_badas.loc[selected_period].values[i]}")


In [None]:
w_month = widgets.Select(
    options=list(df_chats_monthly.index.values),
    value=df_chats_monthly.index.values[0],
    description='월별 선택:',
    disabled=False
)

display(w_month)

def on_month_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        display_stats(df_chats_monthly, df_gifts_monthly, df_badas_monthly, change['new'])

w_month.observe(on_month_change)


In [None]:
w_year = widgets.Select(
    options=list(df_chats_yearly.index.values),
    value=df_chats_yearly.index.values[0],
    description='연도별 선택:',
    disabled=False
)

display(w_year)

def on_year_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        display_stats(df_chats_yearly, df_gifts_yearly, df_badas_yearly, change['new'])

w_year.observe(on_year_change)
