In [1]:
!pip install pyarrow



In [2]:
import pyarrow as pa
import pyarrow.parquet as pq
from datetime import datetime

"""
timestamp: timestamptz
view_total: integer
posts_total: integer
positive: integer
negative: integer
neutral: integer
"""

schema = pa.schema([
    ("checked_at", pa.timestamp('us', tz="UTC")),  # UTC 타임존을 포함한 타임스탬프
    ("total_views", pa.int32()),                   # IntegerType → pa.int32()
    ("total_posts_comments", pa.int32()),          # IntegerType → pa.int32()
    ("positive", pa.int32()),                      # IntegerType → pa.int32()
    ("negative", pa.int32()),                      # IntegerType → pa.int32()
    ("neutral", pa.int32())                        # IntegerType → pa.int32()
])

ROW_CNT = 288

# 샘플 데이터 생성 (PyArrow는 리스트 기반)
data = {
    "checked_at": [datetime(2025, 2, 17, (i*5)//60, (i*5)%60, 0) for i in range(ROW_CNT)],
    "total_views": [i*i for i in range(ROW_CNT)],
    "total_posts_comments": [i for i in range(ROW_CNT)],
    "positive": [i for i in range(ROW_CNT)],
    "negative": [2*i for i in range(ROW_CNT)],
    "neutral": [3*i for i in range(ROW_CNT)]
}


# PyArrow Table 생성
table = pa.Table.from_pydict(data, schema=schema)

# Parquet 파일로 저장
pq.write_table(table, "mock_data.parquet")

print("Parquet 파일 저장 완료!")





Parquet 파일 저장 완료!


In [3]:
import pyarrow.parquet as pq
import pandas as pd

# Parquet 파일 읽기
table = pq.read_table("mock_data.parquet")

# PyArrow Table을 Pandas DataFrame으로 변환
df = table.to_pandas()

# CSV 파일로 저장 (UTF-8 인코딩, 헤더 포함)
df.to_csv("mock_data.csv", index=False)

print("CSV 파일 저장 완료!")

CSV 파일 저장 완료!


In [7]:
for i in range(288):
    print(f"('2025-02-17 {i//12:02d}:{(i%12)*5:02d}:00', {i*i},{i},{i},{2*i},{3*i}),")

('2025-02-17 00:00:00', 0,0,0,0,0),
('2025-02-17 00:05:00', 1,1,1,2,3),
('2025-02-17 00:10:00', 4,2,2,4,6),
('2025-02-17 00:15:00', 9,3,3,6,9),
('2025-02-17 00:20:00', 16,4,4,8,12),
('2025-02-17 00:25:00', 25,5,5,10,15),
('2025-02-17 00:30:00', 36,6,6,12,18),
('2025-02-17 00:35:00', 49,7,7,14,21),
('2025-02-17 00:40:00', 64,8,8,16,24),
('2025-02-17 00:45:00', 81,9,9,18,27),
('2025-02-17 00:50:00', 100,10,10,20,30),
('2025-02-17 00:55:00', 121,11,11,22,33),
('2025-02-17 01:00:00', 144,12,12,24,36),
('2025-02-17 01:05:00', 169,13,13,26,39),
('2025-02-17 01:10:00', 196,14,14,28,42),
('2025-02-17 01:15:00', 225,15,15,30,45),
('2025-02-17 01:20:00', 256,16,16,32,48),
('2025-02-17 01:25:00', 289,17,17,34,51),
('2025-02-17 01:30:00', 324,18,18,36,54),
('2025-02-17 01:35:00', 361,19,19,38,57),
('2025-02-17 01:40:00', 400,20,20,40,60),
('2025-02-17 01:45:00', 441,21,21,42,63),
('2025-02-17 01:50:00', 484,22,22,44,66),
('2025-02-17 01:55:00', 529,23,23,46,69),
('2025-02-17 02:00:00', 576,24,24,4