In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv('data/user_records_202503130422.csv')

In [None]:
character_name_map = {0:"무작위",1:"재키",2:"아야",3:"피오라",4:"매그너스",5:"자히르",6:"나딘",7:"현우",8:"하트",9:"아이솔",10:"리 다이린",11:"유키",12:"혜진",13:"쇼우",14:"키아라",15:"시셀라",16:"실비아",17:"아드리아나",18:"쇼이치",19:"엠마",20:"레녹스",21:"로지",22:"루크",23:"캐시",24:"아델라",25:"버니스",26:"바바라",27:"알렉스",28:"수아",29:"레온",30:"일레븐",31:"리오",32:"윌리엄",33:"니키",34:"나타폰",35:"얀",36:"이바",37:"다니엘",38:"제니",39:"카밀로",40:"클로에",41:"요한",42:"비앙카",43:"셀린",44:"에키온",45:"마이",46:"에이든",47:"라우라",48:"띠아",49:"펠릭스",50:"엘레나",51:"프리야",52:"아디나",53:"마커스",54:"칼라",55:"에스텔",56:"피올로",57:"마르티나",58:"헤이즈",59:"아이작",60:"타지아",61:"이렘",62:"테오도르",63:"이안",64:"바냐",65:"데비&마를렌",66:"아르다",67:"아비게일",68:"알론소",69:"레니",70:"츠바메",71:"케네스",72:"카티야",73:"샬럿",74:"다르코",75:"르노어",76:"가넷",77:"유민",78:"히스이",79:"유스티나",9999:"나쟈"}

In [None]:
# filter_by_character_id = 10

df_filtered = df[
    # (df['character_id'] == filter_by_character_id) &
    (df['mode'].isin([2, 3])) & 
    (df['play_time'] >= (15*60)) & 
    (df['play_time'] <= (1350)) &
    (df['rp'] >= 5000)
]

df_standardized = pd.DataFrame({
    'damage_dealt_per_min': df_filtered['damage_dealt_to_players'] / (df_filtered['play_time'] / 60),
    'damage_taken_per_min': df_filtered['damage_taken_from_players'] / (df_filtered['play_time'] / 60),
    'healing_per_min': df_filtered['healing_amount'] / (df_filtered['play_time'] / 60),
    'id': df_filtered['character_id'].apply(lambda x : character_name_map[x]) + df_filtered['weapon_id'].astype(str),
})

df_aggregated = df_standardized.groupby('id').mean().reset_index()

features = ['damage_dealt_per_min', 'damage_taken_per_min', 'healing_per_min']
X = df_aggregated[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=features)

print(f"데이터 크기: {len(X_scaled_df)}")

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
df_aggregated['cluster'] = kmeans.fit_predict(X_scaled)

centers = kmeans.cluster_centers_
centers_df = pd.DataFrame(scaler.inverse_transform(centers), 
                         columns=features)

# 각 클러스터에 색상 지정
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
cluster_colors = {i: colors[i] for i in range(len(np.unique(df_aggregated['cluster'])))}

fig_3d = px.scatter_3d(
    df_aggregated, 
    x='damage_dealt_per_min', 
    y='damage_taken_per_min', 
    z='healing_per_min',
    color='cluster',
    color_discrete_map=cluster_colors,
    hover_data=['id'],
    labels={
        'damage_dealt_per_min': '분당 공격량',
        'damage_taken_per_min': '분당 피해량',
        'healing_per_min': '분당 치유량',
        'cluster': '클러스터'
    },
    title='캐릭터 군집 분석 결과 (3D)'
)

# 클러스터 중심점 추가
for i, center in centers_df.iterrows():
    fig_3d.add_scatter3d(
        x=[center['damage_dealt_per_min']], 
        y=[center['damage_taken_per_min']], 
        z=[center['healing_per_min']],
        mode='markers',
        marker=dict(color=colors[i], size=10, symbol='diamond'),
        name=f'클러스터 {i} 중심'
    )

fig_3d.update_layout(
    scene=dict(
        xaxis_title='분당 공격량',
        yaxis_title='분당 피해량',
        zaxis_title='분당 치유량'
    ),
    legend_title_text='클러스터',
    width=900,
    height=700
)

# 페어플롯 시각화
fig_pair = px.scatter_matrix(
    df_aggregated,
    dimensions=features,
    color='cluster',
    color_discrete_map=cluster_colors,
    hover_data=['id'],
    labels={
        'damage_dealt_per_min': '분당 공격량',
        'damage_taken_per_min': '분당 피해량',
        'healing_per_min': '분당 치유량',
        'cluster': '클러스터'
    },
    title='캐릭터 군집 분석 결과 (페어플롯)'
)

fig_pair.update_layout(
    width=900,
    height=900
)

# 2D 시각화 (모든 조합)
features_pairs = [
    ('damage_dealt_per_min', 'damage_taken_per_min'),
    ('damage_dealt_per_min', 'healing_per_min'),
    ('damage_taken_per_min', 'healing_per_min')
]

# 2x2 서브플롯 생성
fig_2d = make_subplots(rows=2, cols=2, subplot_titles=[
    '분당 공격량 vs 분당 피해량',
    '분당 공격량 vs 분당 치유량',
    '분당 피해량 vs 분당 치유량',
    '클러스터별 캐릭터 분포'
])

# 첫 세 개의 서브플롯에 2D 산점도 추가
for i, (x_feat, y_feat) in enumerate(features_pairs):
    row, col = (i // 2) + 1, (i % 2) + 1
    
    for cluster_id in np.unique(df_aggregated['cluster']):
        cluster_data = df_aggregated[df_aggregated['cluster'] == cluster_id]
        
        fig_2d.add_trace(
            go.Scatter(
                x=cluster_data[x_feat],
                y=cluster_data[y_feat],
                mode='markers',
                marker=dict(color=cluster_colors[cluster_id]),
                name=f'클러스터 {cluster_id}',
                text=cluster_data['id'],
                showlegend=(i == 0),  # 첫 번째 서브플롯에만 범례 표시
            ),
            row=row, col=col
        )
        
        # 클러스터 중심점 추가
        fig_2d.add_trace(
            go.Scatter(
                x=[centers_df.iloc[cluster_id][x_feat]],
                y=[centers_df.iloc[cluster_id][y_feat]],
                mode='markers',
                marker=dict(
                    color=cluster_colors[cluster_id],
                    size=12,
                    symbol='diamond',
                    line=dict(color='black', width=1)
                ),
                name=f'중심점 {cluster_id}',
                showlegend=False
            ),
            row=row, col=col
        )

# 마지막 서브플롯에 막대 그래프 추가
cluster_counts = df_aggregated['cluster'].value_counts().sort_index()
fig_2d.add_trace(
    go.Bar(
        x=cluster_counts.index.astype(str),
        y=cluster_counts.values,
        marker_color=[cluster_colors[i] for i in cluster_counts.index],
        text=cluster_counts.values,
        textposition='auto',
    ),
    row=2, col=2
)

fig_2d.update_layout(
    title_text='캐릭터 군집 분석 결과 (2D)',
    height=800,
    width=1000
)

# x축과 y축 라벨 업데이트
fig_2d.update_xaxes(title_text='분당 공격량', row=1, col=1)
fig_2d.update_yaxes(title_text='분당 피해량', row=1, col=1)

fig_2d.update_xaxes(title_text='분당 공격량', row=1, col=2)
fig_2d.update_yaxes(title_text='분당 치유량', row=1, col=2)

fig_2d.update_xaxes(title_text='분당 피해량', row=2, col=1)
fig_2d.update_yaxes(title_text='분당 치유량', row=2, col=1)

fig_2d.update_xaxes(title_text='클러스터', row=2, col=2)
fig_2d.update_yaxes(title_text='캐릭터 수', row=2, col=2)

# 각 시각화 표시
fig_3d.show()
fig_pair.show()
fig_2d.show()