In [2]:
!pip install faker


Collecting faker
  Downloading faker-37.12.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.12.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 2.0/2.0 MB 33.0 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.12.0


In [4]:
import json
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker('ko_KR')

START_DATE = datetime(2025, 1, 1)
END_DATE = datetime(2025, 8, 16)

categories = ["privacy", "child", "safety", "finance"]
mid_categories = ["개인정보보호법", "정보보안사건", "데이터관리정책"]
sub_categories = ["소분류1", "소분류2"]

channels = ["blog", "twitter", "insta", "community"]

def generate_article():
    return {
        "title": fake.text(max_nb_chars=20).replace("\n", ""),
        "url": fake.url(),
        "content": fake.text(max_nb_chars=600).replace("\n", " ")
    }

def generate_social_entry():
    return {
        "channel": random.choice(channels),
        "date": datetime.now().strftime("%Y%m%d%H%M%S"),
        "content": fake.text(max_nb_chars=400).replace("\n", " ")
    }

def generate_data():
    data = {
        "all": {
            "news": {
                "daily_timeline": {},
                "weekly_timeline": {},
                "monthly_timeline": {}
            },
            "social": {
                "daily_timeline": {},
                "weekly_timeline": {},
                "monthly_timeline": {}
            }
        }
    }

    current_date = START_DATE
    while current_date <= END_DATE:
        date_str = current_date.strftime("%Y-%m-%d")
        week_str = current_date.strftime("%Y-W%U")
        month_str = current_date.strftime("%Y-%m")

        news_day_entry = { "대분류목록": {} }
        news_week_entry = { "대분류목록": {} }
        news_month_entry = { "대분류목록": {} }

        social_day_entry = { "대분류목록": {}, "counts": {"찬성": 0, "반대": 0} }
        social_week_entry = { "대분류목록": {}, "counts": {"찬성": 0, "반대": 0} }
        social_month_entry = { "대분류목록": {}, "counts": {"찬성": 0, "반대": 0} }

        for cat in categories:
            # NEWS
            mid_map = {}
            for mid in mid_categories:
                mid_count = random.randint(50, 230)
                sub_map = {}

                for sub in sub_categories:
                    sub_count = random.randint(5, 25)
                    articles = [generate_article() for _ in range(2)]

                    sub_map[f"{mid}_{sub}"] = {
                        "count": sub_count,
                        "관련법": f"{mid} 관련법 조항",
                        "articles": articles,
                        "대표뉴스": articles[0]["title"]
                    }

                mid_map[mid] = {
                    "count": mid_count,
                    "소분류목록": sub_map
                }

            news_day_entry["대분류목록"][cat] = { "중분류목록": mid_map }
            news_week_entry["대분류목록"][cat] = { "중분류목록": mid_map }
            news_month_entry["대분류목록"][cat] = { "중분류목록": mid_map }

            # SOCIAL
            mid_social_map = {}
            agree_total = 0
            disagree_total = 0

            for mid in mid_categories:
                sub_map = {}
                for sub in sub_categories:
                    agree_count = random.randint(5, 10)
                    repeal_count = random.randint(5, 10)
                    disagree_count = random.randint(5, 10)

                    agree_list = [generate_social_entry() for _ in range(agree_count)]
                    repeal_list = [generate_social_entry() for _ in range(repeal_count)]
                    disagree_list = [generate_social_entry() for _ in range(disagree_count)]

                    sub_map[f"{mid}_{sub}"] = {
                        "관련법": f"{mid} 관련법 조항",
                        "count": agree_count + repeal_count + disagree_count,
                        "counts": {
                            "찬성": agree_count + repeal_count,
                            "반대": disagree_count
                        },
                        "찬성": {
                            "개정강화": {
                                "count": agree_count,
                                "소셜목록": agree_list
                            },
                            "폐지약화": {
                                "count": repeal_count,
                                "소셜목록": repeal_list
                            }
                        },
                        "반대": {
                            "소셜목록": disagree_list
                        },
                        "대표뉴스": fake.sentence()
                    }

                    agree_total += agree_count + repeal_count
                    disagree_total += disagree_count

                mid_social_map[mid] = {
                    "count": random.randint(50, 230),
                    "소분류목록": sub_map
                }

            social_day_entry["대분류목록"][cat] = { "중분류목록": mid_social_map }
            social_week_entry["대분류목록"][cat] = { "중분류목록": mid_social_map }
            social_month_entry["대분류목록"][cat] = { "중분류목록": mid_social_map }

        # 소셜 전체 합계
        social_day_entry["counts"]["찬성"] = agree_total
        social_day_entry["counts"]["반대"] = disagree_total
        social_week_entry["counts"]["찬성"] = agree_total
        social_week_entry["counts"]["반대"] = disagree_total
        social_month_entry["counts"]["찬성"] = agree_total
        social_month_entry["counts"]["반대"] = disagree_total

        data["all"]["news"]["daily_timeline"][date_str] = news_day_entry
        data["all"]["news"]["weekly_timeline"].setdefault(week_str, news_week_entry)
        data["all"]["news"]["monthly_timeline"].setdefault(month_str, news_month_entry)

        data["all"]["social"]["daily_timeline"][date_str] = social_day_entry
        data["all"]["social"]["weekly_timeline"].setdefault(week_str, social_week_entry)
        data["all"]["social"]["monthly_timeline"].setdefault(month_str, social_month_entry)

        current_date += timedelta(days=1)

    return data

# 생성 및 저장
full_data = generate_data()

with open("data/dadata.json", "w", encoding="utf-8") as f:
    json.dump(full_data, f, ensure_ascii=False, indent=2)

print("✅ JSON 파일 저장 완료: data/ddata.json")


✅ JSON 파일 저장 완료: data/ddata.json
