# bokeh 이용한 시각화1

In [10]:
import pandas as pd
import glob
import os
from datetime import datetime

# 파일 경로
post_path = "/Users/admin/softeer/project/0812/post/"
comment_path = "/Users/admin/softeer/project/0812/comment/"

# CSV 파일 리스트 가져오기
post_files = glob.glob(os.path.join(post_path, "*.csv"))
comment_files = glob.glob(os.path.join(comment_path, "*.csv"))

# 파일 병합
post_df = pd.concat([pd.read_csv(f) for f in post_files], ignore_index=True)
comment_df = pd.concat([pd.read_csv(f) for f in comment_files], ignore_index=True)
# DateTime을 datetime 형식으로 변환
post_df['DateTime'] = pd.to_datetime(post_df['DateTime'], format='mixed')
comment_df['DateTime'] = pd.to_datetime(comment_df['DateTime'], format='mixed')
post_df=post_df.sort_values(by='DateTime')
comment_df=comment_df.sort_values(by='DateTime')

# ViewCount 열을 숫자로 변환
post_df['ViewCount'] = pd.to_numeric(post_df['ViewCount'], errors='coerce')

# DateTime을 datetime 형식으로 변환
post_df['DateTime'] = pd.to_datetime(post_df['DateTime'], format='mixed')
comment_df['DateTime'] = pd.to_datetime(comment_df['DateTime'], format='mixed')

# g90 데이터에서 2024-04-26 이후의 데이터 제거
post_df = post_df[~((post_df['CarName'] == 'g90') & (post_df['DateTime'] >= datetime(2024, 4, 26)))]
comment_df = comment_df[~((comment_df['CarName'] == 'g90') & (comment_df['DateTime'] >= datetime(2024, 4, 26)))]
# 일자별 Post 수 count
post_count_df = post_df.groupby(['DateTime', 'CarName']).size().reset_index(name='post_count')

# 일자별 Comment 수 count
comment_count_df = comment_df.groupby(['DateTime', 'CarName']).size().reset_index(name='comment_count')

# 일자별 ViewCount 및 게시글 수, 댓글 수 기반 점수 계산
score_df = post_df.groupby(['DateTime', 'CarName']).agg({'ViewCount': 'sum', 'CarName': 'size'}).rename(columns={'CarName': 'post_count'}).reset_index()

# comment_count_df와 병합
score_df = pd.merge(score_df, comment_count_df, on=['DateTime', 'CarName'], how='left')
score_df['comment_count'] = score_df['comment_count'].fillna(0)

# 점수 계산
score_df['score'] = score_df['ViewCount'] / score_df['post_count'] + score_df['post_count'] * 10 + score_df['comment_count']
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import BoxAnnotation
import numpy as np
from fastdtw import fastdtw
from sklearn.preprocessing import MinMaxScaler
from dtaidistance import dtw

output_notebook()

# 주간 데이터로 집계
score_df['DateTime'] = pd.to_datetime(score_df['DateTime'])
weekly_score_df = score_df.groupby([pd.Grouper(key='DateTime', freq='W'), 'CarName']).mean().reset_index()

# Score 정규화
scaler = MinMaxScaler()
weekly_score_df['NormalizedScore'] = scaler.fit_transform(weekly_score_df[['score']])

# g90 기준 데이터
g90_score = weekly_score_df[weekly_score_df['CarName'] == 'g90']['NormalizedScore'].values

# 색상 매핑 확인을 위한 출력
car_list = ['코나', '싼타페']
color_list = ['blue', 'green']

# g90과 다른 차종 간 DTW 패턴 유사성 계산
similarity_scores = {}
highlight_segments = {}

for car, color in zip(car_list, color_list):
    car_score = weekly_score_df[weekly_score_df['CarName'] == car]['NormalizedScore'].values
    similarity = dtw.distance(g90_score, car_score)
    similarity_scores[car] = similarity
    
    # 각 차종에서 가장 유사한 구간 찾기
    min_distance = float('inf')
    best_segment = None
    
    for j in range(len(car_score) - len(g90_score) + 1):
        segment = car_score[j:j + len(g90_score)]
        segment_distance = dtw.distance(g90_score, segment)
        
        if segment_distance < min_distance:
            min_distance = segment_distance
            best_segment = (j, j + len(g90_score))
    
    highlight_segments[car] = best_segment

# 유사한 패턴을 시각화
p = figure(title="Weekly DTW Comparison: g90 vs Others", x_axis_label='Index', y_axis_label='Normalized Score', height=400, width=800)

# g90의 패턴을 먼저 그리기
p.line(x=list(range(len(g90_score))), y=g90_score, legend_label='g90', line_width=2, color='red', alpha=0.7)

# 다른 차종의 패턴을 그리기 (기본 선 70% 명암도)
for car, color in zip(car_list, color_list):
    car_scores = weekly_score_df[weekly_score_df['CarName'] == car]['NormalizedScore'].values
    p.line(x=list(range(len(car_scores))), y=car_scores, legend_label=f'{car} (유사도: {similarity_scores[car]:.2f})', line_width=2, color=color, alpha=0.7)
    
    # 유사한 구간을 진하게 칠하기
    best_segment = highlight_segments.get(car)
    if best_segment is not None:
        p.line(x=list(range(best_segment[0], best_segment[1])), 
               y=car_scores[best_segment[0]:best_segment[1]], 
               line_width=3, color=color, alpha=1.0)

p.legend.location = "top_left"
p.grid.visible = True

show(p)
