In [148]:
import json
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

In [169]:
LOG_DATA_PATH = "data\\added_action_id.json"

# 중첩 구조 평탄화해서 읽기
with open(LOG_DATA_PATH, "r", encoding="utf-8") as f:
    # data = json.load(f)
    data = [json.loads(line.strip()) for line in f if line.strip()]  # .jsonl일 때 읽어오는 방법
    
df = pd.json_normalize(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   event                 79 non-null     object
 1   userId                79 non-null     object
 2   username              79 non-null     object
 3   timestamp             79 non-null     object
 4   workspaceId           79 non-null     int64 
 5   details.endDate       51 non-null     object
 6   details.importance    79 non-null     int64 
 7   details.name          79 non-null     object
 8   details.state         79 non-null     object
 9   details.startDate     50 non-null     object
 10  details.participants  79 non-null     object
 11  details.actionId      79 non-null     int64 
dtypes: int64(3), object(9)
memory usage: 7.5+ KB


In [180]:
# stat 1/2 공통 작업 - participants 펼치기
exploded = df.explode('details.participants')
exploded['participants_userId'] = exploded['details.participants'].apply(lambda x: x.get('userId') if isinstance(x, dict) else None)

# 필요한 필드 정리
filtered = exploded[[
    'event',
    'userId',
    'participants_userId',
    'timestamp',
    'workspaceId',
    'details.actionId',
    'details.name',
    'details.state',
    'details.importance',
    'details.startDate',
    'details.endDate',
    'details.participants'
]]

filtered = filtered.dropna(subset=['participants_userId'])

# 최신 이벤트만 남기기
filtered = (
    filtered
    .sort_values("timestamp", ascending=False)
    .drop_duplicates(
        subset=[
            "workspaceId", 
            "details.actionId", 
            "details.name", 
            "participants_userId"
        ],
        keep="first"
    )
)

# DELETE 이벤트 처리 - 가장 마지막이 DELETE면 카운트에서 제외
latest_events = (
    df
    .sort_values("timestamp", ascending=False)
    .drop_duplicates(
        subset=[
            "workspaceId", 
            "details.actionId", 
            "details.name"
        ],
        keep="first"
    )
)
deleted_ids = latest_events[latest_events["event"] == "DELETE_PROJECT_PROGRESS_ACTION"]["details.actionId"].unique()

filtered = filtered[~filtered["details.actionId"].isin(deleted_ids)]

# Dtype 정리
filtered['userId'] = filtered['userId'].astype(int)
filtered['participants_userId'] = filtered['participants_userId'].astype(int)
filtered['timestamp'] = pd.to_datetime(filtered['timestamp'])
filtered['details.startDate'] = pd.to_datetime(filtered['details.startDate'])
filtered['details.endDate'] = pd.to_datetime(filtered['details.endDate'])

# 모든 사용자 ID (이벤트 발생자 + 참여자)
all_user_ids = set(filtered['userId'].unique()) | set(filtered['participants_userId'].unique())

user_dfs = {}
for uid in all_user_ids:
    # 해당 사용자가 발생시킨 이벤트 OR 참여한 이벤트
    user_data = filtered[(filtered['userId'] == uid) | (filtered['participants_userId'] == uid)].copy()
    user_dfs[f'df_{uid}'] = user_data


# ===== Statistics 1 Start =====
group_list = {}

# state, importance 기준 grouping -> count 목적
# 이벤트 발생자와 참여자 모두 집계
all_stat1_results = []

for i, (name, user_df) in enumerate(user_dfs.items(), start=1):
    
    # 이벤트 발생자 집계
    initiator_grouped = user_df.groupby(['userId', 'details.state', 'details.importance']).size().reset_index(name='count')
    initiator_grouped['role'] = 'initiator'
    
    # 참여자 집계
    participants_grouped = user_df.groupby(['participants_userId', 'details.state', 'details.importance']).size().reset_index(name='count')
    participants_grouped['role'] = 'participant'
    participants_grouped = participants_grouped.rename(columns={'participants_userId': 'userId'})

    # 합치기
    combined = pd.concat([initiator_grouped, participants_grouped], ignore_index=True)
    all_stat1_results.append(combined)
    
    # 모든 유저의 결과 합치기
    if all_stat1_results:
        final_stat1 = pd.concat(all_stat1_results, ignore_index=True)
        # 같은 userId, state, importance, role 조합이 있다면 count 합계
        stat1_result = final_stat1.groupby(['userId', 'details.state', 'details.importance'])['count'].sum().reset_index().to_dict(orient='records')
    else:
        stat1_result = []

    # dict type(json)으로 변환
    for name, gr in group_list.items():
        stat1_result = gr.to_dict(orient='records')

    # ===== Statistics 2 Start=====
    filtered_users = {}

    # DONE 상태의 action만 뽑아오기
    done_df = exploded[exploded['details.state'] == 'DONE'][[
        'userId', 'participants_userId', 'details.state', 'details.importance', 
        'details.startDate', 'details.endDate', 'workspaceId', 
        'details.actionId', 'details.name', 'timestamp', 'event'
    ]].copy()

    done_df = done_df.dropna(subset=['participants_userId'])

    # 삭제된 actionId 확인 (먼저 처리)
    latest_action_events = (
        df
        .sort_values('timestamp', ascending=False)
        .drop_duplicates(
            subset=[
                'workspaceId',
                'details.actionId'
            ],
            keep="first"
        )
    )
    deleted_action_ids = latest_action_events[latest_action_events['event'] == 'DELETE_PROJECT_PROGRESS_ACTION']['details.actionId'].unique()

    # 삭제된 actionId 제거
    done_df = done_df[~done_df['details.actionId'].isin(deleted_action_ids)]

    # 날짜형 변환 및 duration 계산
    done_df['details.startDate'] = pd.to_datetime(done_df['details.startDate'], utc=True)
    done_df['details.endDate'] = pd.to_datetime(done_df['details.endDate'], utc=True)
    done_df['duration_hours'] = ((done_df['details.endDate'] - done_df['details.startDate']).dt.total_seconds() / 3600)

    # 결측치, 음수 제거
    done_df = done_df.dropna(subset=['duration_hours'])
    done_df = done_df[done_df['duration_hours'] >= 0]

    # 참여자용 데이터 (participants_userId 기준 중복 제거)
    participants_done_df = (
        done_df
        .sort_values('timestamp', ascending=False)
        .drop_duplicates(
            subset=[
                'workspaceId',
                'details.actionId',
                'participants_userId'
            ],
            keep="first"
        )
    )

    # 이벤트 발생자용 데이터 (userId 기준 중복 제거)
    initiator_done_df = (
        done_df
        .sort_values('timestamp', ascending=False)
        .drop_duplicates(
            subset=[
                'workspaceId',
                'details.actionId',
                'userId'
            ],
            keep="first"
        )
    )

    # 참여자 통계
    participants_result = (
        participants_done_df
        .groupby(['participants_userId', 'details.importance'])
        ['duration_hours']
        .mean()
        .reset_index(name='mean_hours')
        .rename(columns={'participants_userId': 'userId'})
    )
    participants_result['role'] = 'participant'

    # 이벤트 발생자 통계
    initiator_result = (
        initiator_done_df
        .groupby(['userId', 'details.importance'])
        ['duration_hours']
        .mean()
        .reset_index(name='mean_hours')
    )
    initiator_result['role'] = 'initiator'

    final_result = pd.concat([participants_result, initiator_result], ignore_index=True)
    final_result = final_result.drop('role', axis=1)
    filtered_users['stat2_result'] = final_result

    stat2_result = final_result.to_dict(orient='records')

In [181]:
stat1_result

[{'userId': 1, 'details.state': 'DONE', 'details.importance': 3, 'count': 2},
 {'userId': 1,
  'details.state': 'IN_PROGRESS',
  'details.importance': 3,
  'count': 2},
 {'userId': 5, 'details.state': 'DONE', 'details.importance': 1, 'count': 6},
 {'userId': 5, 'details.state': 'DONE', 'details.importance': 2, 'count': 10},
 {'userId': 5, 'details.state': 'DONE', 'details.importance': 4, 'count': 2},
 {'userId': 5, 'details.state': 'DONE', 'details.importance': 5, 'count': 4},
 {'userId': 5,
  'details.state': 'IN_PROGRESS',
  'details.importance': 5,
  'count': 2},
 {'userId': 6, 'details.state': 'DONE', 'details.importance': 1, 'count': 8},
 {'userId': 6, 'details.state': 'DONE', 'details.importance': 2, 'count': 4},
 {'userId': 6, 'details.state': 'DONE', 'details.importance': 4, 'count': 6},
 {'userId': 6,
  'details.state': 'IN_PROGRESS',
  'details.importance': 5,
  'count': 2},
 {'userId': 13, 'details.state': 'DONE', 'details.importance': 1, 'count': 2},
 {'userId': 13, 'detail

In [182]:
stat2_result

[{'userId': 5.0, 'details.importance': 1, 'mean_hours': 26.732472008138334},
 {'userId': 5.0, 'details.importance': 2, 'mean_hours': 86.85746215163417},
 {'userId': 5.0, 'details.importance': 4, 'mean_hours': 557.6763227412081},
 {'userId': 6.0, 'details.importance': 1, 'mean_hours': 26.732472008138334},
 {'userId': 6.0, 'details.importance': 2, 'mean_hours': 98.71572744869638},
 {'userId': 6.0, 'details.importance': 4, 'mean_hours': 557.9989630154175},
 {'userId': 13.0, 'details.importance': 1, 'mean_hours': 26.732472008138334},
 {'userId': 13.0, 'details.importance': 2, 'mean_hours': 86.85746215163417},
 {'userId': 13.0, 'details.importance': 4, 'mean_hours': 557.9989630154175},
 {'userId': '13', 'details.importance': 1, 'mean_hours': 26.732472008138334},
 {'userId': '13', 'details.importance': 2, 'mean_hours': 86.85746215163417},
 {'userId': '13', 'details.importance': 4, 'mean_hours': 557.9989630154175},
 {'userId': '5', 'details.importance': 1, 'mean_hours': 26.732472008138334},
 

In [150]:
df

Unnamed: 0,event,userId,username,timestamp,workspaceId,details.endDate,details.importance,details.name,details.state,details.startDate,details.participants,details.actionId
0,DONE_PROJECT_PROGRESS_ACTION,1,조유민,2025-06-24T14:00:56.260693158Z,1,2025-06-24T14:00:56.249852307,3,Postgresql 쿼리 최적화 및 인덱스 설정,DONE,,[],1
1,CREATE_PROJECT_PROGRESS_ACTION,1,조유민,2025-06-24T14:39:04.700877013Z,1,,3,회원 프론트 화면 피그마 디자인,IN_PROGRESS,,"[{'username': '조유민', 'userId': 1}]",2
2,CREATE_PROJECT_PROGRESS_ACTION,1,조유민,2025-06-24T14:44:48.333462271Z,1,,3,회원 프론트 화면 피그마 디자인,IN_PROGRESS,,"[{'username': '조유민', 'userId': 1}]",2
3,CREATE_PROJECT_PROGRESS_ACTION,1,조유민,2025-06-25T01:49:49.826141997Z,1,,3,회원 프론트 화면 피그마 디자인,IN_PROGRESS,,"[{'userId': 1, 'username': '조유민'}]",2
4,CREATE_PROJECT_PROGRESS_ACTION,1,조유민,2025-06-25T02:07:31.731169716Z,1,,3,회원 프론트 화면 피그마 디자인,IN_PROGRESS,,"[{'userId': 1, 'username': '조유민'}]",2
...,...,...,...,...,...,...,...,...,...,...,...,...
74,UPDATE_PARTICIPANT_TO_ACTION,6,유민,2025-06-26T01:10:39.431163536Z,40,2025-06-26T01:10:39.430666636,2,캘린더 데이터 동기화 기능 구현,DONE,2025-06-03T00:00:00,"[{'userId': 6, 'username': '유민'}]",4
75,UPDATE_PARTICIPANT_TO_ACTION,5,조유민,2025-06-26T01:10:53.064549333Z,40,2025-06-26T01:10:53.06408301,3,캘린더 이벤트 알림 시스템 구축,BEFORE,2025-06-02T00:00:00,"[{'userId': 5, 'username': '조유민'}]",13
76,UPDATE_PARTICIPANT_TO_ACTION,6,유민,2025-06-26T01:11:03.203960208Z,40,2025-06-26T01:11:03.202732873,3,캘린더 이벤트 알림 시스템 구축,BEFORE,2025-06-02T00:00:00,"[{'userId': 6, 'username': '유민'}, {'userId': 5...",13
77,UPDATE_PARTICIPANT_TO_ACTION,5,조유민,2025-06-26T01:11:03.324514406Z,40,2025-06-26T01:11:03.202732873,3,캘린더 이벤트 알림 시스템 구축,BEFORE,2025-06-02T00:00:00,"[{'userId': 6, 'username': '유민'}, {'userId': 5...",13


In [168]:
# participants 펼치기
exploded = df.explode('details.participants')
exploded['participants_userId'] = exploded['details.participants'].apply(lambda x: x.get('userId') if isinstance(x, dict) else None)

# 필요한 필드만 정리
filtered = exploded[[
    'event',
    'userId',
    'participants_userId',
    'timestamp',
    'workspaceId',
    'details.actionId',
    'details.name',
    'details.state',
    'details.importance',
    'details.startDate',
    'details.endDate',
    'details.participants'
]]

filtered = filtered.dropna(subset=['participants_userId'])

# 최신 이벤트만 남기기
filtered = (
    filtered
    .sort_values("timestamp", ascending=False)
    .drop_duplicates(
        subset=[
            "workspaceId", 
            "details.actionId", 
            "details.name", 
            "participants_userId"
        ],
        keep="first"
    )
)

# DELETE 이벤트 처리 - 가장 마지막이 DELETE면 카운트에서 제외
latest_events = (
    df
    .sort_values("timestamp", ascending=False)
    .drop_duplicates(
        subset=[
            "workspaceId", 
            "details.actionId", 
            "details.name"
        ],
        keep="first"
    )
)
deleted_ids = latest_events[latest_events["event"] == "DELETE_PROJECT_PROGRESS_ACTION"]["details.actionId"].unique()

filtered = filtered[~filtered["details.actionId"].isin(deleted_ids)]

# userId별로 분리
# user_dfs = {}
# for uid in filtered['participants_userId'].unique():
#     user_dfs[f'df_{uid}'] = filtered[filtered['participants_userId'] == uid].copy()

# 모든 관련 사용자 ID 수집 (이벤트 발생자 + 참여자)
all_user_ids = set(filtered['userId'].unique()) | set(filtered['participants_userId'].unique())

user_dfs = {}
for uid in all_user_ids:
    # 해당 사용자가 발생시킨 이벤트 OR 참여한 이벤트
    user_data = filtered[(filtered['userId'] == uid) | (filtered['participants_userId'] == uid)].copy()
    user_dfs[f'df_{uid}'] = user_data

In [152]:
filtered

Unnamed: 0,event,userId,participants_userId,timestamp,workspaceId,details.actionId,details.name,details.state,details.importance,details.startDate,details.endDate,details.participants
1,CREATE_PROJECT_PROGRESS_ACTION,1,1.0,2025-06-24T14:39:04.700877013Z,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,,,"{'username': '조유민', 'userId': 1}"
2,CREATE_PROJECT_PROGRESS_ACTION,1,1.0,2025-06-24T14:44:48.333462271Z,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,,,"{'username': '조유민', 'userId': 1}"
3,CREATE_PROJECT_PROGRESS_ACTION,1,1.0,2025-06-25T01:49:49.826141997Z,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,,,"{'userId': 1, 'username': '조유민'}"
4,CREATE_PROJECT_PROGRESS_ACTION,1,1.0,2025-06-25T02:07:31.731169716Z,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,,,"{'userId': 1, 'username': '조유민'}"
5,CREATE_PROJECT_PROGRESS_ACTION,1,1.0,2025-06-25T05:43:36.331555839Z,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,,,"{'username': '조유민', 'userId': 1}"
...,...,...,...,...,...,...,...,...,...,...,...,...
76,UPDATE_PARTICIPANT_TO_ACTION,6,5.0,2025-06-26T01:11:03.203960208Z,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02T00:00:00,2025-06-26T01:11:03.202732873,"{'userId': 5, 'username': '조유민'}"
77,UPDATE_PARTICIPANT_TO_ACTION,5,6.0,2025-06-26T01:11:03.324514406Z,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02T00:00:00,2025-06-26T01:11:03.202732873,"{'userId': 6, 'username': '유민'}"
77,UPDATE_PARTICIPANT_TO_ACTION,5,5.0,2025-06-26T01:11:03.324514406Z,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02T00:00:00,2025-06-26T01:11:03.202732873,"{'userId': 5, 'username': '조유민'}"
78,DONE_PROJECT_PROGRESS_ACTION,5,5.0,2025-06-26T01:11:10.761430217Z,40,13,캘린더 이벤트 알림 시스템 구축,DONE,3,2025-06-02T00:00:00,2025-06-26T01:11:10.758819415,"{'userId': 5, 'username': '조유민'}"


In [153]:
filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 121 entries, 1 to 78
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   event                 121 non-null    object 
 1   userId                121 non-null    object 
 2   participants_userId   121 non-null    float64
 3   timestamp             121 non-null    object 
 4   workspaceId           121 non-null    int64  
 5   details.actionId      121 non-null    int64  
 6   details.name          121 non-null    object 
 7   details.state         121 non-null    object 
 8   details.importance    121 non-null    int64  
 9   details.startDate     85 non-null     object 
 10  details.endDate       85 non-null     object 
 11  details.participants  121 non-null    object 
dtypes: float64(1), int64(3), object(8)
memory usage: 12.3+ KB


In [154]:
filtered['userId'] = filtered['userId'].astype(int)
filtered['participants_userId'] = filtered['participants_userId'].astype(int)
filtered['timestamp'] = pd.to_datetime(filtered['timestamp'])
filtered['details.startDate'] = pd.to_datetime(filtered['details.startDate'])
filtered['details.endDate'] = pd.to_datetime(filtered['details.endDate'])

In [155]:
filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 121 entries, 1 to 78
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   event                 121 non-null    object             
 1   userId                121 non-null    int64              
 2   participants_userId   121 non-null    int64              
 3   timestamp             121 non-null    datetime64[ns, UTC]
 4   workspaceId           121 non-null    int64              
 5   details.actionId      121 non-null    int64              
 6   details.name          121 non-null    object             
 7   details.state         121 non-null    object             
 8   details.importance    121 non-null    int64              
 9   details.startDate     85 non-null     datetime64[ns]     
 10  details.endDate       85 non-null     datetime64[ns]     
 11  details.participants  121 non-null    object             
dtypes: datetime64[

In [156]:
filtered

Unnamed: 0,event,userId,participants_userId,timestamp,workspaceId,details.actionId,details.name,details.state,details.importance,details.startDate,details.endDate,details.participants
1,CREATE_PROJECT_PROGRESS_ACTION,1,1,2025-06-24 14:39:04.700877013+00:00,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,NaT,NaT,"{'username': '조유민', 'userId': 1}"
2,CREATE_PROJECT_PROGRESS_ACTION,1,1,2025-06-24 14:44:48.333462271+00:00,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,NaT,NaT,"{'username': '조유민', 'userId': 1}"
3,CREATE_PROJECT_PROGRESS_ACTION,1,1,2025-06-25 01:49:49.826141997+00:00,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,NaT,NaT,"{'userId': 1, 'username': '조유민'}"
4,CREATE_PROJECT_PROGRESS_ACTION,1,1,2025-06-25 02:07:31.731169716+00:00,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,NaT,NaT,"{'userId': 1, 'username': '조유민'}"
5,CREATE_PROJECT_PROGRESS_ACTION,1,1,2025-06-25 05:43:36.331555839+00:00,1,2,회원 프론트 화면 피그마 디자인,IN_PROGRESS,3,NaT,NaT,"{'username': '조유민', 'userId': 1}"
...,...,...,...,...,...,...,...,...,...,...,...,...
76,UPDATE_PARTICIPANT_TO_ACTION,6,5,2025-06-26 01:11:03.203960208+00:00,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02,2025-06-26 01:11:03.202732873,"{'userId': 5, 'username': '조유민'}"
77,UPDATE_PARTICIPANT_TO_ACTION,5,6,2025-06-26 01:11:03.324514406+00:00,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02,2025-06-26 01:11:03.202732873,"{'userId': 6, 'username': '유민'}"
77,UPDATE_PARTICIPANT_TO_ACTION,5,5,2025-06-26 01:11:03.324514406+00:00,40,13,캘린더 이벤트 알림 시스템 구축,BEFORE,3,2025-06-02,2025-06-26 01:11:03.202732873,"{'userId': 5, 'username': '조유민'}"
78,DONE_PROJECT_PROGRESS_ACTION,5,5,2025-06-26 01:11:10.761430217+00:00,40,13,캘린더 이벤트 알림 시스템 구축,DONE,3,2025-06-02,2025-06-26 01:11:10.758819415,"{'userId': 5, 'username': '조유민'}"


In [157]:
# 파일로 저장(확인용)
# filtered.to_json('exploded.jsonl', orient='records', lines=True, force_ascii=False)

In [158]:
user_dfs.keys()

dict_keys(['df_1.0', 'df_6.0', 'df_13.0', 'df_5.0'])

In [159]:
print(filtered['userId'].unique())
print(filtered['participants_userId'].unique())

[ 1 48  6 13  5]
[ 1  6 13  5]


In [160]:
filtered['details.actionId'].unique()

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])

In [161]:
filtered[filtered['details.actionId'] == 5]

Unnamed: 0,event,userId,participants_userId,timestamp,workspaceId,details.actionId,details.name,details.state,details.importance,details.startDate,details.endDate,details.participants
18,UPDATE_PARTICIPANT_TO_ACTION,13,13,2025-06-26 02:59:56.268871531+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:59:56.266855503,"{'username': '박소현', 'userId': 13}"
18,UPDATE_PARTICIPANT_TO_ACTION,13,6,2025-06-26 02:59:56.268871531+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:59:56.266855503,"{'username': '유민', 'userId': 6}"
19,UPDATE_PARTICIPANT_TO_ACTION,6,13,2025-06-26 02:59:56.322154257+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:59:56.266855503,"{'username': '박소현', 'userId': 13}"
19,UPDATE_PARTICIPANT_TO_ACTION,6,6,2025-06-26 02:59:56.322154257+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:59:56.266855503,"{'username': '유민', 'userId': 6}"
35,UPDATE_PARTICIPANT_TO_ACTION,6,13,2025-06-26 02:37:35.845583149+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-18 06:00:00,2025-06-26 02:37:35.787681726,"{'username': '박소현', 'userId': 13}"
35,UPDATE_PARTICIPANT_TO_ACTION,6,6,2025-06-26 02:37:35.845583149+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-18 06:00:00,2025-06-26 02:37:35.787681726,"{'username': '유민', 'userId': 6}"
36,UPDATE_PARTICIPANT_TO_ACTION,13,13,2025-06-26 02:38:10.588398214+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:38:10.586673115,"{'username': '박소현', 'userId': 13}"
36,UPDATE_PARTICIPANT_TO_ACTION,13,6,2025-06-26 02:38:10.588398214+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:38:10.586673115,"{'username': '유민', 'userId': 6}"
37,UPDATE_PARTICIPANT_TO_ACTION,6,13,2025-06-26 02:38:10.643213678+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:38:10.586673115,"{'username': '박소현', 'userId': 13}"
37,UPDATE_PARTICIPANT_TO_ACTION,6,6,2025-06-26 02:38:10.643213678+00:00,40,5,캘린더 프론트 화면!!,DONE,4,2025-06-02 21:00:00,2025-06-26 02:38:10.586673115,"{'username': '유민', 'userId': 6}"


In [162]:
# ===== Statistics 1 Start =====
# state, importance 기준 grouping -> count 목적
group_list = {}

for i, (name, user_df) in enumerate(user_dfs.items(), start=1):
    group_list[f'grouped{i}'] = (user_df.groupby(['participants_userId', 'details.state', 'details.importance']).size().reset_index(name='count'))

In [163]:
group_list.keys()

dict_keys(['grouped1', 'grouped2', 'grouped3', 'grouped4'])

In [164]:
group_list
# group_list['grouped1']

{'grouped1':    participants_userId details.state  details.importance  count
 0                  1.0          DONE                   3      1
 1                  1.0   IN_PROGRESS                   3     13,
 'grouped2':     participants_userId details.state  details.importance  count
 0                   6.0        BEFORE                   2      5
 1                   6.0        BEFORE                   3      5
 2                   6.0        BEFORE                   5      1
 3                   6.0          DONE                   1      5
 4                   6.0          DONE                   2     11
 5                   6.0          DONE                   3      3
 6                   6.0          DONE                   4      9
 7                   6.0          DONE                   5      1
 8                   6.0   IN_PROGRESS                   2      1
 9                   6.0   IN_PROGRESS                   5      1
 10                  6.0       PENDING                

In [165]:
for name, gr in group_list.items():
    json_temp = gr.to_dict(orient='records')

    # with open(f"data\\stat1-{name}.json", "w", encoding="utf-8") as f:
    #     json.dump(json_temp, f, ensure_ascii=False, indent=2)

In [166]:
print(type(group_list))
group_list

<class 'dict'>


{'grouped1':    participants_userId details.state  details.importance  count
 0                  1.0          DONE                   3      1
 1                  1.0   IN_PROGRESS                   3     13,
 'grouped2':     participants_userId details.state  details.importance  count
 0                   6.0        BEFORE                   2      5
 1                   6.0        BEFORE                   3      5
 2                   6.0        BEFORE                   5      1
 3                   6.0          DONE                   1      5
 4                   6.0          DONE                   2     11
 5                   6.0          DONE                   3      3
 6                   6.0          DONE                   4      9
 7                   6.0          DONE                   5      1
 8                   6.0   IN_PROGRESS                   2      1
 9                   6.0   IN_PROGRESS                   5      1
 10                  6.0       PENDING                

In [167]:
### stat 2
filtered_users = {}

for name, df in user_dfs.items():
    filtered = df[df['details.state'] == 'DONE'][
        ['participant_userId', 'details.state', 'details.importance', 'details.startDate', 'details.endDate']
    ]
    filtered_users[name] = filtered

KeyError: "['participant_userId'] not in index"

In [None]:
filtered_users.keys()

dict_keys(['df_3', 'df_5', 'df_1', 'df_2', 'df_4'])

In [None]:
filtered_users['df_3']

Unnamed: 0,participant_userId,details.state,details.importance,details.startDate,details.endDate
8,3,DONE,3,2025-06-04T06:09:44.072653,2025-06-11T06:09:44.072653
11,3,DONE,5,2025-06-05T06:09:44.072701,2025-06-12T06:09:44.072701
16,3,DONE,5,2025-06-11T06:09:44.072806,2025-06-16T06:09:44.072806
21,3,DONE,5,2025-06-08T06:09:44.072910,2025-06-14T06:09:44.072910
24,3,DONE,2,2025-06-12T06:09:44.072956,2025-06-16T06:09:44.072956
27,3,DONE,2,2025-06-11T06:09:44.073003,2025-06-14T06:09:44.073003
28,3,DONE,1,2025-05-28T06:09:44.073019,2025-06-12T06:09:44.073019
29,3,DONE,5,2025-06-08T06:09:44.073036,2025-06-13T06:09:44.073036
30,3,DONE,1,2025-06-06T06:09:44.073049,2025-06-11T06:09:44.073049
37,3,DONE,1,2025-06-07T06:09:44.073152,2025-06-14T06:09:44.073152


In [None]:
for name, df in filtered_users.items():
    # 날짜형 변환
    df['details.startDate'] = pd.to_datetime(df['details.startDate'], utc=True)
    df['details.endDate'] = pd.to_datetime(df['details.endDate'], utc=True)

    # 소요 시간 계산 (시간 단위)
    df['duration_hours'] = (df['details.endDate'] - df['details.startDate']).dt.total_seconds() / 3600

    # 컬럽 drop
    df = df[['participant_userId', 'details.importance', 'duration_hours']]

    # 평균 계산
    df = df.groupby(['participant_userId', 'details.importance'])['duration_hours'].mean().reset_index(name='mean_hours')

    # 딕셔너리 업데이트
    filtered_users[name] = df

In [None]:
filtered_users['df_3']

Unnamed: 0,participant_userId,details.importance,mean_hours
0,3,1,182.4
1,3,2,115.2
2,3,3,177.6
3,3,4,112.0
4,3,5,144.0


In [None]:
for name, fdf in filtered_users.items():
    json_temp = fdf.to_dict(orient='records')

    # 파일로 저장
    # with open(f"data\\stat2-{name}.json", "w", encoding="utf-8") as f:
    #     for item in json_temp:  # data는 List[Dict]
    #         json_line = json.dumps(item, ensure_ascii=False)
    #         f.write(json_line + "\n")
    #         # json.dump(json_temp, f, ensure_ascii=False, indent=2)

{'df_3':    participant_userId  details.importance  mean_hours
 0                   3                   1       182.4
 1                   3                   2       115.2
 2                   3                   3       177.6
 3                   3                   4       112.0
 4                   3                   5       144.0,
 'df_5':    participant_userId  details.importance  mean_hours
 0                   5                   1  158.857143
 1                   5                   2  132.000000
 2                   5                   3  120.000000
 3                   5                   4   81.000000
 4                   5                   5  136.000000,
 'df_1':    participant_userId  details.importance  mean_hours
 0                   1                   1  133.333333
 1                   1                   2  149.333333
 2                   1                   3  168.000000
 3                   1                   4   96.750000
 4                   1                 

In [None]:
print(type(filtered_users))
filtered_users

<class 'dict'>


{'df_3':    participant_userId  details.importance  mean_hours
 0                   3                   1       182.4
 1                   3                   2       115.2
 2                   3                   3       177.6
 3                   3                   4       112.0
 4                   3                   5       144.0,
 'df_5':    participant_userId  details.importance  mean_hours
 0                   5                   1  158.857143
 1                   5                   2  132.000000
 2                   5                   3  120.000000
 3                   5                   4   81.000000
 4                   5                   5  136.000000,
 'df_1':    participant_userId  details.importance  mean_hours
 0                   1                   1  133.333333
 1                   1                   2  149.333333
 2                   1                   3  168.000000
 3                   1                   4   96.750000
 4                   1                 