# 코나 화재

In [87]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from utils import *

In [118]:
def plot_per_day_post_count_plotly_with_events(df, title, day_start, day_end, events, per_post_likes_threshold=0, per_day_views_threshold=0, x_axis='date', y_axis='count'):
    full_date_daily = pd.date_range(start=day_start, end=day_end, freq='D').to_frame(index=False, name='full_range_date')
    full_date_daily['full_range_date'] = full_date_daily['full_range_date'].astype(str)

    df = df.loc[df["likes"] > per_post_likes_threshold]
    df = df.loc[:, ["created_day", "views"]]
    
    per_day_view = df.groupby(df.created_day).agg({ # 일별 조회수의 합, 게시물의 수 count
        'views': ['sum', 'count'],  
    })

    per_day_view.columns = per_day_view.columns.droplevel(0)
    per_day_view = per_day_view.loc[per_day_view['sum'] > per_day_views_threshold]

    full_range_df = pd.merge(full_date_daily, per_day_view, left_on='full_range_date', right_on='created_day', how='left')
    full_range_df = full_range_df.fillna(0)


    fig = go.Figure()
    
    # if smoothing=='ewm':
    #     full_range_df = full_range_df.assign(count_smoothed=full_range_df['count'].ewm(span=3, adjust=True).mean())
    #     fig.add_trace(
    #         go.Scatter(
    #             x=full_range_df['full_range_date'],
    #             y=full_range_df['count_ewm'],
    #             mode='lines',
    #             name='filtered_count_ewm'
    #         )
    #     )
    # elif smoothing=='ma':
    #     full_range_df = full_range_df.assign(count_smoothed=full_range_df['count'].rolling().mean())
    #     fig.add_trace(
    #         go.Scatter(
    #             x=full_range_df['full_range_date'],
    #             y=full_range_df['count_ewm'],
    #             mode='lines',
    #             name='filtered_count_ewm'
    #         )
    #     )
    
    fig.add_trace(
        go.Bar(
            x=full_range_df['full_range_date'],
            y=full_range_df['count'],
            name='filtered_count'
        )
    )
    y_max = int(full_range_df['count'].max())
       
    
    for event in events:
        date, color = event
        if day_start <= date <= day_end:
            fig.add_shape(type="line", x0=date, x1=date, y0=0, y1=y_max, line=dict(color=color, width=2, dash='dash'))

    fig.update_layout(
        title=title,
        xaxis_title=x_axis,
        yaxis_title=y_axis,
    )

    fig.show()
    return full_range_df


In [77]:
# def plot_per_day_post_count_plt(df, title):
#     # x축이 될 column 생성
#     full_date_daily = pd.date_range(start='2020-01-09', end='2024-06-25', freq='D').to_frame(index=False, name='full_range_date')
#     full_date_daily['full_range_date'] = full_date_daily['full_range_date'].astype(str)
    
#     temp_df = df.sort_values(by=['created_at'])
#     per_day_view = temp_df.loc[:, ["created_day", "views"]]
#     per_day_view = per_day_view.groupby(per_day_view.created_day)['views'].count().reset_index(name='count')

#     full_range_df = pd.merge(full_date_daily, per_day_view, left_on='full_range_date', right_on='created_day', how='left')
#     full_range_df = full_range_df.fillna(0)

#     plt.figure(figsize=(30,10))
#     plt.plot(full_range_df["full_range_date"], full_range_df['count'])
#     plt.title(title)
#     plt.show()
#     return full_range_df

In [78]:
# read csv (only posts)
clien_posts_df = pd.read_csv('data/0808/clien_posts.csv')  
clien_posts_df['from'] = 'clien'

fm_korea_posts_df = pd.read_csv('data/0808/fmkorea_posts.csv') 
fm_korea_posts_df['from'] = 'fm_korea'

naver_cafe_posts_df = pd.read_csv('data/0808/naver_cafe_posts.csv') 
naver_cafe_posts_df['from'] = 'naver_cafe'
naver_cafe_posts_df.created_at = naver_cafe_posts_df.created_at.apply(parse_dates)


posts_df = pd.concat([clien_posts_df, fm_korea_posts_df, naver_cafe_posts_df], axis=0)
posts_df.created_at = posts_df.created_at.apply(parse_dates)

In [79]:
# Casting (for views)
naver_cafe_posts_df.views = naver_cafe_posts_df.views.map(str).apply(remove_commna).apply(convert_views_to_int)
posts_df.views = posts_df.views.map(str).apply(remove_commna).apply(convert_views_to_int)

naver_cafe_posts_df.likes = naver_cafe_posts_df.likes.fillna(0).map(int)
posts_df.likes = posts_df.likes.fillna(0).map(int)

# naver_cafe_posts_df['likes'].isna()

In [68]:
# filtering by keyword & add created_day column
keywords = ['코나', '화재']

filtered_by_keyword_posts_df = filter_by_keyword(posts_df, keywords)
filtered_by_keyword_posts_df = add_created_day_col(filtered_by_keyword_posts_df).sort_values(by=['created_at'])

filtered_by_keyword_naver_cafe_posts_df = naver_cafe_posts_df = filter_by_keyword(naver_cafe_posts_df, keywords)
filtered_by_keyword_naver_cafe_posts_df = add_created_day_col(filtered_by_keyword_naver_cafe_posts_df).sort_values(by=['created_at'])

# EDA (only naver cafe)

In [119]:
INFO = "빨간색 라인 \n\
2018년 4월 12일 코나 EV 출시 \n\
2020년 10월 8일 2017년 9월 29일부터 2020년 3월 13일까지 생산된 \n\
2021년 3월 22일 전량 배터리 리콜 결정\n\
2021년 4월 코나 EV 단종\n\
2023년 2월 28일 코나 EV 2세대 디자인 필름 공개\n\
2023년 4월 13일 코나 EV 2세대 정식 출시\
\
파란색 라인 \n\
2020년 10월 17일 코나 화재 남양주  \n\
2021년 6월 23일 코나 화재 보령 \n\
2020년 9월 26일 코나 화재 제주 \n\
2020년 10월 4일 코나 화재 대구 \n\
2021년 1월 24일 코나 화재 대구  \n\
2021년 7월 1일 코나 화재 세종 \n\
"

events = [
    ("2020-10-08", "Red"), # 특정 기간(2017년 9월 29일부터 2020년 3월 13일)동안 생산된 코나 일렉트릭 25,564대를 리콜 
    ("2020-12-18", "Red"), # 코나 단종
    ("2021-03-22", "Red"), # 코나 전량 배터리 리콜 결정
    ("2023-02-28", "Red"), # 코나 EV 2세대 디자인 필름 공개
    ("2023-04-13", "Red"), # 코나 EV 2세대 정식 출시
    ("2021-02-24", "#79db93"), # 국토부에서 배터리 결함 발표 
    ("2020-04-02", "#79db93"), # 코나 화재 (경기도 안산)
    ("2020-05-29", "#79db93"), # 코나 화재 (대구 북구 주차장)
    ("2020-08-07", "#79db93"), # 코나 화재 (대구 북구 주차장)
    ("2020-08-15", "#79db93"), # 코나 화재 (전북 정읍시 주차장)
    ("2020-09-26", "#79db93"), # 코나 화재 (제주)
    ("2020-08-07", "#79db93"), # 코나 화재 (대구 북구 주차장)
    ("2020-10-04", "#79db93"), # 코나 화재 (대구 달성군 주차장)
    ("2020-10-17", "#79db93"), # 코나 화재 (남양주)
    ("2021-01-23", "#79db93"), # 코나 화재 (대구 달서구 충전소)
    ("2021-02-16", "#79db93"), # LG 배터리 사용한 현대 전기버스 화재
    ("2021-06-23", "#79db93"), # 코나 화재 (충남 보령)
    ("2021-07-01", "#79db93"), # 코나 화재 (세종시)
    ("2022-01-11", "#79db93"), # 코나 화재 (충남 태안읍)
    ("2022-02-12", "#79db93"), # 코나 화재 (부산 지하주차장)
    ("2023-05-24", "#79db93"), # 코나 화재 (대구 달성군)
]

In [120]:
# original
vis_df = filtered_by_keyword_naver_cafe_posts_df
day_start = '2020-01-01'
day_end = '2022-12-31'
per_day_views_threshold = -1
per_post_likes_threshold = -1
title = f'Number of Posts per day (naver cafe) / {day_start}~{day_end} / per_day_views_threshold: {per_day_views_threshold} / per_post_likes_threshold: {per_post_likes_threshold}' 
vis_df = plot_per_day_post_count_plotly_with_events(filtered_by_keyword_naver_cafe_posts_df, title=title,\
    day_start=day_start, day_end=day_end, \
    per_post_likes_threshold=per_post_likes_threshold, \
    per_day_views_threshold=per_day_views_threshold, \
    events=events,
)

In [122]:
# minimum filtering wo ewm
vis_df = filtered_by_keyword_naver_cafe_posts_df
day_start = '2020-01-01'
day_end = '2022-12-31'
per_day_views_threshold = 0
per_post_likes_threshold = 0
title = f'Number of Posts per day (naver cafe) / {day_start}~{day_end} / per_day_views_threshold: {per_day_views_threshold} / per_post_likes_threshold: {per_post_likes_threshold}' 
vis_df = plot_per_day_post_count_plotly_with_events(filtered_by_keyword_naver_cafe_posts_df, title=title,\
    day_start=day_start, day_end=day_end, \
    per_post_likes_threshold=per_post_likes_threshold, \
    per_day_views_threshold=per_day_views_threshold, \
    events=events, \
)

In [123]:
# minimum filtering w ewm
vis_df = filtered_by_keyword_naver_cafe_posts_df
day_start = '2020-01-01'
day_end = '2022-12-31'
per_day_views_threshold = 0
per_post_likes_threshold = 0

# ewm setting
title = f'Number of Posts per day (naver cafe) / {day_start}~{day_end} / per_day_views_threshold: {per_day_views_threshold} / per_post_likes_threshold: {per_post_likes_threshold}' 
vis_df = plot_per_day_post_count_plotly_with_events(filtered_by_keyword_naver_cafe_posts_df, title=title,\
    day_start=day_start, day_end=day_end, \
    per_post_likes_threshold=per_post_likes_threshold, \
    per_day_views_threshold=per_day_views_threshold, \
    events=events, \
)

In [73]:
# df = filtered_by_keyword_naver_cafe_posts_df
# day_start = '2020-01-01'
# day_end = '2022-12-31'
# per_day_views_threshold = 0
# per_post_likes_threshold = 0
# events=events
# ewm=True
    
# full_date_daily = pd.date_range(start=day_start, end=day_end, freq='D').to_frame(index=False, name='full_range_date')
# full_date_daily['full_range_date'] = full_date_daily['full_range_date'].astype(str)

# df = df.loc[df["likes"] > per_post_likes_threshold]
# df = df.loc[:, ["created_day", "views"]]

# per_day_view = df.groupby(df.created_day).agg({ # 일별 조회수의 합, 게시물의 수 count
#     'views': ['sum', 'count'],  
# })

# per_day_view.columns = per_day_view.columns.droplevel(0)
# per_day_view = per_day_view.loc[per_day_view['sum'] > per_day_views_threshold]

# full_range_df = pd.merge(full_date_daily, per_day_view, left_on='full_range_date', right_on='created_day', how='left')
# full_range_df = full_range_df.fillna(0)


# full_range_df = full_range_df.assign(count=full_range_df['count'].ewm(span=3, adjust=True).mean())
    
# fig = px.line(full_range_df, x="full_range_date", y="count", title=title)
# y_max = int(full_range_df['count'].max())
    
# for event in events:
#     date, color = event
#     if day_start <= date <= day_end:
#         fig.add_shape(type="line", x0=date, x1=date, y0=0, y1=y_max, line=dict(color=color, width=2, dash='dash'))

# fig.update_layout(
#     title=title,
# )

# fig.show()
# # full_range_df

In [36]:
day_start = '2022-02-12'
day_end = '2022-02-12'
filename = 'temp.csv'
temp = filter_by_date_and_save_to_csv(filtered_by_keyword_naver_cafe_posts_df, day_start, day_end, filename)
temp

Unnamed: 0,id,title,content,likes,url,author,views,created_at,updated_at,from,created_day


# EDA (All)

In [124]:
# minimum filtering
vis_df = filtered_by_keyword_posts_df
title = 'Number of Posts per day (naver cafe)'
day_start = '2020-01-01'
day_end = '2022-12-31'
per_day_views_threshold = -1
per_post_likes_threshold = -1
vis_df = plot_per_day_post_count_plotly_with_events(filtered_by_keyword_posts_df, title=title,\
    day_start=day_start, day_end=day_end, \
    per_post_likes_threshold=per_post_likes_threshold, \
    per_day_views_threshold=per_day_views_threshold, \
    events=events, \
)

In [125]:
# minimum filtering
vis_df = filtered_by_keyword_posts_df
title = 'Number of Posts per day (naver cafe)'
day_start = '2020-01-01'
day_end = '2022-12-31'
per_day_views_threshold = 0
per_post_likes_threshold = 0
vis_df = plot_per_day_post_count_plotly_with_events(filtered_by_keyword_posts_df, title=title,\
    day_start=day_start, day_end=day_end, \
    per_post_likes_threshold=per_post_likes_threshold, \
    per_day_views_threshold=per_day_views_threshold, \
    events=events, \
)

In [126]:
day_start = '2020-12-18'
day_end = '2020-12-18'
filename = 'temp.csv'
temp = filter_by_date_and_save_to_csv(filtered_by_keyword_naver_cafe_posts_df, day_start, day_end, filename)
temp

Unnamed: 0,id,title,content,likes,url,author,views,created_at,updated_at,from,created_day
123,541209,[공유] [단독] 화재·브레이크 결함...애증의 코나 전기차 국내 단종,,0,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,백만I볼트I김포,593,2020-12-18 06:39:00,,naver_cafe,2020-12-18
122,541247,코나 전기차 단종,[단독] 화재·브레이크 결함...애증의 코나 전기차 국내 단종 -https://n....,0,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,enoughI코나I충청,745,2020-12-18 09:33:00,,naver_cafe,2020-12-18
121,541286,"현대차, '리콜' 코나 EV 전기차 국내 판매 중단 검토","현대차, '리콜' 코나 EV 전기차 국내 판매 중단 검토현대자동차가 화재와 브레이크...",1,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,뭉치I코니모3I서울,1258,2020-12-18 10:39:00,,naver_cafe,2020-12-18
120,541289,[공유] [단독] 화재·브레이크 결함...애증의 코나 전기차 국내 단종,,0,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,코나주인l 세종,624,2020-12-18 10:43:00,,naver_cafe,2020-12-18
118,541525,코나 단종보고 궁금한게 ..어차피 현기 새플랫폼 나오니까 2세대 전기차는 전부 단종...,니로.쏘부.아이오닉 등..2세대 모델은단종될거 같은데 아닌가요?,0,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,검은바퀴I니로evl파주,1113,2020-12-18 20:26:00,,naver_cafe,2020-12-18
117,541534,개념 국회의원이네요...,https://youtu.be/B8TTz1Tviqc왠만하면 린크 안거는데진짜 개념 ...,0,https://cafe.naver.com/ca-fe/ArticleRead.nhn?c...,동백꽃I쏘울EVl서울,482,2020-12-18 21:17:00,,naver_cafe,2020-12-18
