In [None]:
import pandas as pd
import ast
import json
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import seaborn as sns

# 폰트 경로로 FontProperties 객체 생성
font_path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
fontprop = fm.FontProperties(fname=font_path)

pd.set_option('display.max_rows', None)


def read_parquet_from_gcs(file_names, gcs_prefix, key_path):
    dfs = {}
    for name in file_names:
        path = f"{gcs_prefix}/{name}.parquet"
        dfs[name] = pd.read_parquet(path, storage_options={"token": key_path})
    return dfs


file_list = [
    "polls_question",
    "polls_questionpiece",
    "polls_questionreport",
    "polls_questionset",
    "polls_usercandidate",
    "accounts_paymenthistory",
    "accounts_user",
    "accounts_failpaymenthistory"
]

gcs_prefix = "gs://codeit-project/votes"
key_path = "./config/key.json"

dfs = read_parquet_from_gcs(file_list, gcs_prefix, key_path)


# 질문 테이블
polls_question = dfs["polls_question"]

# 질문 조각 테이블
polls_questionpiece = dfs["polls_questionpiece"]

# 질문 신고 테이블
polls_questionreport = dfs["polls_questionreport"]

# 질문 세트 테이블
polls_questionset = dfs["polls_questionset"]

# 사용자 후보 응답 테이블
polls_usercandidate = dfs["polls_usercandidate"]

# 결제 기록 테이블
accounts_paymenthistory = dfs["accounts_paymenthistory"]

# 사용자 기본 정보 테이블
accounts_user = dfs["accounts_user"]

# 상품 구매 실패 기록 테이블
accounts_failpaymenthistory = dfs["accounts_failpaymenthistory"]

In [5]:
display(polls_question.head())
display(polls_question.describe(include='all'))

Unnamed: 0,id,question_text,created_at
0,99,가장 신비한 매력이 있는 사람은?,2023-03-31 15:22:53
1,100,"""이 사람으로 한 번 살아보고 싶다"" 하는 사람은?",2023-03-31 15:22:53
2,101,미래의 틱톡커는?,2023-03-31 15:22:54
3,102,여기서 제일 특이한 친구는?,2023-03-31 15:22:54
4,103,가장 지켜주고 싶은 사람은?,2023-03-31 15:22:55


Unnamed: 0,id,question_text,created_at
count,5025.0,5025,5025
unique,,3903,
top,,vote,
freq,,56,
mean,2619.897512,,2023-05-26 17:49:11.666268672
min,99.0,,2023-03-31 15:22:53
25%,1365.0,,2023-05-15 14:03:03
50%,2621.0,,2023-06-02 08:06:53
75%,3877.0,,2023-06-06 06:10:14
max,5133.0,,2023-06-06 06:15:52


In [12]:
display(polls_questionpiece.head())
display(polls_questionpiece.describe(include='all'))
print('✅질문조각 question_id 유니크갯수:',polls_questionpiece['question_id'].nunique())
print('✅질문데이터 question_id 유니크갯수:',polls_question['id'].nunique())

print('\n\n\n')
print('✅질문조각 question_id max:',polls_questionpiece['question_id'].max())
print('✅질문데이터 question_id max:',polls_question['id'].max())

print('\n\n\n')
print('✅질문조각 question_id min:',polls_questionpiece['question_id'].min())
print('✅질문데이터 question_id min:',polls_question['id'].min())

Unnamed: 0,id,is_voted,created_at,question_id,is_skipped
0,998458,1,2023-04-28 12:27:22,252,0
1,998459,1,2023-04-28 12:27:22,244,0
2,998460,1,2023-04-28 12:27:22,183,0
3,998461,1,2023-04-28 12:27:22,101,0
4,998462,1,2023-04-28 12:27:22,209,0


Unnamed: 0,id,is_voted,created_at,question_id,is_skipped
count,1265476.0,1265476.0,1265476,1265476.0,1265476.0
mean,76580010.0,0.9630266,2023-05-17 13:39:58.427317504,713.3598,0.000890574
min,998458.0,0.0,2023-04-28 12:27:22,99.0,0.0
25%,20292600.0,1.0,2023-05-09 21:50:54,278.0,0.0
50%,68465330.0,1.0,2023-05-15 14:18:31.500000,477.0,0.0
75%,121266200.0,1.0,2023-05-22 09:56:44,982.0,0.0
max,208385200.0,1.0,2024-05-07 11:32:30,5133.0,1.0
std,59129240.0,0.1886967,,670.6815,0.02982921


✅질문조각 question_id 유니크갯수: 4944
✅질문데이터 question_id 유니크갯수: 5025




✅질문조각 question_id max: 5133
✅질문데이터 question_id max: 5133




✅질문조각 question_id min: 99
✅질문데이터 question_id min: 99


In [13]:
display(polls_questionreport.head())
display(polls_questionreport.describe(include='all'))

Unnamed: 0,id,reason,created_at,question_id,user_id
0,1,이 질문은 재미없어요,2023-04-19 06:20:35,250,837556
1,2,이 질문은 재미없어요,2023-04-19 06:58:09,113,837672
2,3,불쾌한 내용이 포함되어 있음,2023-04-19 06:58:17,113,837672
3,4,어떻게 이런 생각을? 이 질문 최고!,2023-04-19 08:12:42,119,837922
4,5,어떻게 이런 생각을? 이 질문 최고!,2023-04-19 08:12:50,119,837922


Unnamed: 0,id,reason,created_at,question_id,user_id
count,51424.0,51424,51424,51424.0,51424.0
unique,,11,,,
top,,그냥 싫어,,,
freq,,28446,,,
mean,28027.358587,,2023-05-21 22:12:41.641120768,733.099098,1168247.0
min,1.0,,2023-04-19 06:20:35,99.0,832340.0
25%,14020.75,,2023-05-13 01:36:21.249999872,305.0,978481.0
50%,28047.5,,2023-05-18 14:33:45,494.0,1160251.0
75%,42084.25,,2023-05-26 08:45:25,1021.0,1351295.0
max,55767.0,,2024-05-05 14:56:25,5110.0,1583634.0


In [None]:
polls_question[polls_question['id'] == 250]

Unnamed: 0,id,question_text,created_at
0,99,가장 신비한 매력이 있는 사람은?,2023-03-31 15:22:53
1,100,"""이 사람으로 한 번 살아보고 싶다"" 하는 사람은?",2023-03-31 15:22:53
2,101,미래의 틱톡커는?,2023-03-31 15:22:54
3,102,여기서 제일 특이한 친구는?,2023-03-31 15:22:54
4,103,가장 지켜주고 싶은 사람은?,2023-03-31 15:22:55


In [18]:
polls_questionpiece['id'].nunique()

1265476