In [2]:
!pip install faker


Collecting faker
  Downloading faker-37.12.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.12.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 2.0/2.0 MB 33.0 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.12.0


In [1]:
import json
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker('ko_KR')

# 설정값
START_DATE = datetime(2025, 1, 1)
END_DATE = datetime(2025, 8, 16)

# 카테고리 설정
categories = ["privacy", "child", "safety", "finance"]
mid_categories = ["개인정보보호법", "정보보안사건", "데이터관리정책"]
sub_categories = ["소분류1", "소분류2"]

def generate_article():
    return {
        "title": fake.text(max_nb_chars=20).replace("\n", ""),
        "url": fake.url(),
        "content": fake.text(max_nb_chars=600).replace("\n", " ")
    }

# addsocial 전용 (channel="댓글" 고정)
def generate_comment_entry():
    return {
        "channel": "댓글",
        "date": datetime.now().strftime("%Y%m%d%H%M%S"),
        "content": fake.text(max_nb_chars=400).replace("\n", " ")
    }

def generate_data():
    data = {}

    for cat in categories:
        data[cat] = {
            "news": {
                "daily_timeline": {},
                "weekly_timeline": {},
                "monthly_timeline": {}
            },
            # 요청: social 은 counts 만
            "social": {
                "daily_timeline": {},
                "weekly_timeline": {},
                "monthly_timeline": {}
            },
            # 요청: addsocial 은 상세 구조 유지 (channel="댓글")
            "addsocial": {
                "daily_timeline": {},
                "weekly_timeline": {},
                "monthly_timeline": {}
            }
        }

        current_date = START_DATE
        while current_date <= END_DATE:
            date_str = current_date.strftime("%Y-%m-%d")
            week_str = current_date.strftime("%Y-W%U")
            month_str = current_date.strftime("%Y-%m")

            # ---------- NEWS ----------
            news_entry = {"중분류목록": {}}
            for mid in mid_categories:
                mid_count = random.randint(50, 230)
                sub_map = {}
                for sub in sub_categories:
                    sub_count = random.randint(5, 25)
                    articles = [generate_article() for _ in range(2)]
                    sub_map[f"{mid}_{sub}"] = {
                        "count": sub_count,
                        "관련법": f"{mid} 관련법 조항",
                        "articles": articles,
                        "대표뉴스": articles[0]["title"]
                    }
                news_entry["중분류목록"][mid] = {
                    "count": mid_count,
                    "소분류목록": sub_map
                }

            data[cat]["news"]["daily_timeline"][date_str] = news_entry
            data[cat]["news"]["weekly_timeline"].setdefault(week_str, news_entry)
            data[cat]["news"]["monthly_timeline"].setdefault(month_str, news_entry)

            # ---------- SOCIAL (counts만) ----------
            # 날짜별 랜덤 합계만 생성
            agree_total = random.randint(100, 400)
            disagree_total = random.randint(50, 300)
            social_counts_only = {
                "counts": {"찬성": agree_total, "반대": disagree_total}
            }

            data[cat]["social"]["daily_timeline"][date_str] = social_counts_only
            data[cat]["social"]["weekly_timeline"].setdefault(week_str, social_counts_only)
            data[cat]["social"]["monthly_timeline"].setdefault(month_str, social_counts_only)

            # ---------- ADDSOCIAL (상세 구조 + channel="댓글") ----------
            addsocial_entry = {
                "counts": {"찬성": 0, "반대": 0},
                "중분류목록": {}
            }
            for mid in mid_categories:
                sub_map = {}
                mid_total = 0
                for sub in sub_categories:
                    agree_count = random.randint(5, 10)
                    repeal_count = random.randint(5, 10)
                    disagree_count = random.randint(5, 10)

                    agree_list = [generate_comment_entry() for _ in range(agree_count)]
                    repeal_list = [generate_comment_entry() for _ in range(repeal_count)]
                    disagree_list = [generate_comment_entry() for _ in range(disagree_count)]

                    sub_total = agree_count + repeal_count + disagree_count
                    mid_total += sub_total

                    sub_map[f"{mid}_{sub}"] = {
                        "관련법": f"{mid} 관련법 조항",
                        "count": sub_total,
                        "counts": {
                            "찬성": agree_count + repeal_count,
                            "반대": disagree_count
                        },
                        "찬성": {
                            "개정강화": {
                                "count": agree_count,
                                "소셜목록": agree_list
                            },
                            "폐지약화": {
                                "count": repeal_count,
                                "소셜목록": repeal_list
                            }
                        },
                        "반대": {
                            "소셜목록": disagree_list
                        },
                        "대표뉴스": fake.sentence()
                    }

                    addsocial_entry["counts"]["찬성"] += agree_count + repeal_count
                    addsocial_entry["counts"]["반대"] += disagree_count

                addsocial_entry["중분류목록"][mid] = {
                    "count": mid_total,
                    "소분류목록": sub_map
                }

            data[cat]["addsocial"]["daily_timeline"][date_str] = addsocial_entry
            data[cat]["addsocial"]["weekly_timeline"].setdefault(week_str, addsocial_entry)
            data[cat]["addsocial"]["monthly_timeline"].setdefault(month_str, addsocial_entry)

            current_date += timedelta(days=1)

    return data

# 데이터 생성 및 저장
full_data = generate_data()
output_path = "data/data.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(full_data, f, ensure_ascii=False, indent=2)

print(f"✅ JSON 파일 저장 완료: {output_path}")


✅ JSON 파일 저장 완료: data/data.json
