In [1]:
import pandas as pd
import ast
import json
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import seaborn as sns

# 폰트 경로로 FontProperties 객체 생성
font_path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
fontprop = fm.FontProperties(fname=font_path)

pd.set_option('display.max_rows', None)


def read_parquet_from_gcs(file_names, gcs_prefix, key_path):
    dfs = {}
    for name in file_names:
        path = f"{gcs_prefix}/{name}.parquet"
        dfs[name] = pd.read_parquet(path, storage_options={"token": key_path})
    return dfs


file_list = [
    "2023-04-30"]

gcs_prefix = "gs://codeit-project/hackle"
key_path = "/home/leesh/codeit_project/codeit-project-docker/config/key.json"

dfs = read_parquet_from_gcs(file_list, gcs_prefix, key_path)


df_230430 = dfs["2023-04-30"]

In [2]:
df_230430.head()

Unnamed: 0,Asia/Seoul,event_key,session_id,user_id,value,user_properties,hackle_properties,event_properties,id,device_id
0,2023-05-01T00:00:00.024Z,button,jgGFnoyc6GWuGIEgjp3nUoKRQbF3,,0.0,[],"{'language': 'ko-KR', 'isapp': 'true', 'osvers...","{'description': '다음', 'location': 'signotherin...",96d1336f-744b-4deb-a6a4-207494419dbf,cfc34eab-6930-488b-8109-70d37cec7dae
1,2023-05-01T00:00:00.025Z,button,n3PI8GZnRFOKpJslIRgckMeRiln1,,0.0,[],"{'language': 'ko-CA', 'isapp': 'true', 'osvers...","{'description': 'vote 선택', 'location': 'homevo...",2825d1b1-8325-42e2-88eb-8cae61d3a26d,0dde2ecb-ff03-440f-98f3-94f20978b796
2,2023-05-01T00:00:00.029Z,button,L31Dl8vRtdWPJ8XC0gGs1mPwzJH2,,0.0,[],"{'language': 'ko', 'isapp': 'true', 'osversion...","{'description': '친구선택', 'location': 'homeenque...",335b0a36-9ecf-41a9-b7d2-e27125e521ef,b50774ed-24ce-49e1-9383-945ff3aea9ff
3,2023-05-01T00:00:00.036Z,button,QjToElcYNkVxszJDHBhqtITlzDJ3,,0.0,[],"{'language': 'ko', 'isapp': 'true', 'osversion...","{'description': '이름 셔플!', 'location': 'homeenq...",b730bb6a-589a-4074-89a1-87f11a03f9d6,0999c6b6-c579-4c0f-916c-9a113010f84a
4,2023-05-01T00:00:00.063Z,button,VRpXf303RGU730fIO4EjWstzAJi1,,0.0,[],"{'language': 'ko-KR', 'isapp': 'true', 'osvers...","{'description': '앱바 뒤로가기', 'location': 'homefr...",c2a5fd3f-92d5-4864-8e0e-63e563898499,45985493-ed35-4cc5-935f-1bcf922e213c


In [7]:
# 주요 정보 추출
df_simple = df_230430.copy()

# 딕셔너리 → Series로 분해 (중요한 컬럼만)
df_simple['language'] = df_simple['hackle_properties'].apply(lambda x: x.get('language'))
df_simple['devicemodel'] = df_simple['hackle_properties'].apply(lambda x: x.get('devicemodel'))
df_simple['description'] = df_simple['event_properties'].apply(lambda x: x.get('description'))
df_simple['location'] = df_simple['event_properties'].apply(lambda x: x.get('location'))

# 1. UTC 기준으로 파싱
df_simple['event_datetime'] = pd.to_datetime(df_simple['Asia/Seoul'], utc=True)

# 2. 한국시간(Asia/Seoul)으로 타임존 변환
df_simple['event_datetime'] = df_simple['event_datetime'].dt.tz_convert('Asia/Seoul')

# 최종 확인용 컬럼만 선택
df_simple = df_simple[['event_datetime','session_id','device_id', 'event_key', 'description', 'location', 'devicemodel', 'language']]

In [None]:
df_simple['event_datetime'] = df_simple['event_datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')
df_simple.head()

In [None]:
df_simple.to_parquet("/home/leesh/codeit_project/codeit-project-docker/parquet/230430.parquet", engine="pyarrow", compression="snappy")