## Data Parsing

In [1]:
import pandas as pd
import re

def parse_data(file_path):
    df = pd.read_csv(file_path)
    df = df[df['User'] == '플레이봇']
    new_df = pd.DataFrame(columns=["User", "Result", "Level", "Sword_name", "Obtain_gold", "Message"])
    for _, row in df.iterrows():
        message = row["Message"]
        name_pattern = re.findall(r'@(\w+)', message)
        name = name_pattern[0] if name_pattern else None
        enhance_pattern = re.findall(r'강화 (\w+)', message)
        level_pattern = re.findall(r'\+(\d+)', message)
        level = level_pattern[0] if level_pattern else None
        sword_name_pattern = re.findall(r'\[\+\d+\]\s([\w\s]+)', message)
        sword_name = sword_name_pattern[0] if sword_name_pattern else None
        sell_pattern = re.findall(r'획득 골드: \+([\d,]+)G', message)
        obtain_gold = sell_pattern[0].replace(',', '') if sell_pattern else None    
        
        if enhance_pattern:
            result = enhance_pattern[0]
        elif obtain_gold:
            result = '판매'
        else:
            continue

        if result not in ['유지', '성공', '파괴', '판매']:
            continue
        new_df.loc[len(new_df)] = {'User': name, 'Result': result, 'Level': level, 'Sword_name': sword_name, 'Obtain_gold': obtain_gold, 'Message': message}
    
    new_file_path = file_path.split('.')[0] + '_parsed.csv'
    new_df.to_csv(new_file_path, index=False)
    return new_df



In [2]:
datas = ['data/1.csv', 'data/2.csv', 'data/3.csv', 'data/4.csv']
for data in datas:
    parse_data(data)

## Calculate probs

In [None]:
parsed_datas = ['data/1_parsed.csv', 'data/2_parsed.csv', 'data/3_parsed.csv', 'data/4_parsed.csv']
data = pd.concat([pd.read_csv(file) for file in parsed_datas], ignore_index=True)
data['Level'] = pd.to_numeric(data['Level'], errors='coerce')

name_datas = {}

for index, row in data.iterrows():
    name = row['User']
    if name not in name_datas:
        name_datas[name] = pd.DataFrame(columns=row.index)
    name_datas[name].loc[len(name_datas[name])] = row

        
for name, df in name_datas.items():
    for index, row in df.iterrows():
        # if row['Result'] == '성공':
        #     if row['Level'] == 0:
        #         df.at[index, 'Sword_name'] = None
        #     prev_sword = df.loc[index - 1, 'Sword_name'] if index > 0 else None
        #     if pd.notna(prev_sword):
        #         df.at[index, 'Sword_name'] = prev_sword
        if row['Result'] == '파괴':
            prev_level = df.loc[index - 1, 'Level'] if index > 0 else None
            prev_sword = df.loc[index - 1, 'Sword_name'] if index > 0 else None
            if pd.notna(prev_level):
                df.at[index, 'Level'] = prev_level
            if pd.notna(prev_sword):
                df.at[index, 'Sword_name'] = prev_sword

    df.to_csv(f'data/{name}.csv', index=False)
    print(f'Saved data for {name} to data/{name}.csv')

In [5]:
import numpy as np

level_datas = {}

for _, df in name_datas.items():
    for _, row in df.iterrows():
        level = row['Level']
        result = row['Result']
        if level not in level_datas:
            level_datas[level] = {'성공': 0, '유지': 0, '파괴': 0, '판매': np.array([])}
        if result == '판매':
            level_datas[level][result] = np.append(level_datas[level][result], int(row['Obtain_gold']))
        else:
            level_datas[level][result] += 1

for level, results in level_datas.items():
    total = results['성공'] + results['유지'] + results['파괴']
    for result, count in results.items():
        if result == '판매':
            continue
        rate = (count / total) * 100 if total > 0 else 0
        results[result] = rate
    results["Count"] = total
    results["AVG_sell"] = np.mean(results["판매"]) if results["판매"].size > 0 else 0
    results["STD_sell"] = np.std(results["판매"]) if results["판매"].size > 0 else 0
    del results["판매"]

level_summary = pd.DataFrame.from_dict(level_datas, orient='index')
level_summary.index = level_summary.index.astype(int)
level_summary.index.name = 'Level'
level_summary = level_summary.sort_index()
level_summary.to_csv('level_summary.csv')
print('Saved level summary to level_summary.csv')


Saved level summary to level_summary.csv
