In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import filter

filter.clean_batches()

In [None]:
import reorganize

reorganize.reorganize_batches()

In [None]:
#setting
prompt_style = 'AutoCOT2'
data_count = 0
example_count = 10


In [None]:
# Configs
import config 

In [None]:
# Run generator.js
import subprocess

params = ["200", "200"] # Enter the number of data. each prameter indicate the number of small, large map.

subprocess.run(["node", "generator.js"] + params)

In [None]:
# Run placer.py
import placer

maps = placer.load_maps()
for map in maps:
    placer.assign_parameters(map, enemy_density=0.05, cohesion=0.3, treasure_density=0.01, range_multiplier=2, boss=True)
    placer.modify_map(map, group_min_dist=10, flag_try_count=50, enemy_sparsity=3)
placer.save_maps(maps)

In [None]:
# Run labeler.py
import labeler

labeler.label(placed_path=config.PLACED_PATH, labelled_path=config.LABELLED_PATH, file_count=4, difficulty_curve_interval=5)

In [None]:
import utility
import numpy as np

param_name_list = ["map_size", "enemy_count", "treasure_count", "room_count"]

labelled_data_list = utility.load_json_files(config.LABELLED_PATH)

mean_dict = dict()
std_dict = dict()

for param_name in param_name_list:
    value_list = np.array([data["params"][param_name] for data in labelled_data_list])
    mean_dict[param_name] = np.mean(value_list, axis=0)
    std_dict[param_name] = np.std(value_list, axis=0)

print(mean_dict)
print(std_dict)

In [None]:
# Generate preprocessed
from preprocessed_data_generator import generate_preprocessed_data

generate_preprocessed_data(data_count, example_count, prompt_style)

In [None]:
# Run comparator.py
import comparator

comparator.compare(preprocessed_path=config.PREPROCESSED_PATH, compared_path=config.COMPARED_PATH, file_count=49)

In [None]:
# Calculate statistics
import statistician

diff_param_name_list = [
    "map_size",
    "room_count",
    "enemy_count",
    "treasure_count",
]

after_param_name_list = ["playability", "other_ASCII_count", "empty_validation"]

abs_diff_mean, abs_diff_std = statistician.calc_abs_diff_mean_std(param_name_list=diff_param_name_list)
after_mean, after_std = statistician.calc_after_mean_std(param_name_list=after_param_name_list)

In [None]:
# Draw Graph
mean_dict = abs_diff_mean | after_mean
std_dict = abs_diff_std | after_std

mean_dict["map_width"] = mean_dict["map_size"][0]
mean_dict["map_height"] = mean_dict["map_size"][1]

std_dict["map_width"] = std_dict["map_size"][0]
std_dict["map_height"] = std_dict["map_size"][1]

del mean_dict["map_size"]
del std_dict["map_size"]

print(mean_dict)
print(std_dict)
statistician.draw_graph(mean_dict, std_dict)

In [None]:
import utility
from statistician import _calc_abs_diff

config = utility.load_config()
path = config["paths"]["compared"]
compared_list = utility.load_json_files(path)
diff_param_name_list = [
    "map_size",
    "room_count",
    "enemy_count",
    "treasure_count",
]

diff_dict = _calc_abs_diff(compared_list, diff_param_name_list)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde

# KDE 계산 및 그래프 그리기 함수 + 비율별 범위 표시
def plot_kde(data, param_name, label):
    # 데이터가 1개 이하일 때 KDE 적용 불가 처리
    if len(data) <= 1:
        print(f"Not enough data points for KDE for {label} in {param_name}")
        return
    
    # KDE 계산
    kde = gaussian_kde(data)
    
    # X축 값을 위한 범위 설정
    x_vals = np.linspace(min(data), max(data), 1000)
    kde_vals = kde(x_vals)

    # 최빈값 좌표 찾기
    max_idx = np.argmax(kde_vals)  # kde_vals에서 가장 큰 값의 인덱스 찾기
    mode_x = x_vals[max_idx]       # 최빈값 x 좌표
    mode_y = kde_vals[max_idx]     # 최빈값 y 좌표
    
    # 최빈값 좌표 출력
    print(f"Mode of {label} in {param_name}: (x: {mode_x}, y: {mode_y})")
    
    # 50%, 75%, 90%, 95% 범위 계산
    upper_50 = np.percentile(data, 50)
    upper_75 = np.percentile(data, 75)
    upper_90 = np.percentile(data, 90)
    upper_95 = np.percentile(data, 95)
    
    # 각 범위 출력
    print(f"50% of {label} in {param_name}: upper bound = {upper_50}")
    print(f"75% of {label} in {param_name}: upper bound = {upper_75}")
    print(f"90% of {label} in {param_name}: upper bound = {upper_90}")
    print(f"95% of {label} in {param_name}: upper bound = {upper_95}")
    
    # 그래프 그리기
    plt.figure(figsize=(8, 6))
    plt.plot(x_vals, kde_vals, label=f'KDE of {label}')
    plt.fill_between(x_vals, kde_vals, alpha=0.5)

    # 최빈값에 빨간 점 표시
    plt.plot(mode_x, mode_y, 'ro', label=f'Mode: {mode_x:.2f}')
    
    # 각 상위 경계선 표시
    plt.axvline(upper_50, color='blue', linestyle='--', label='50% bound')
    plt.axvline(upper_75, color='green', linestyle='--', label='75% bound')
    plt.axvline(upper_90, color='orange', linestyle='--', label='90% bound')
    plt.axvline(upper_95, color='purple', linestyle='--', label='95% bound')

    plt.title(f'Kernel Density Estimation for {label} in {param_name}')
    plt.xlabel(f'{label} Value')
    plt.ylabel('Density')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_residual_histogram(diff_dict):
    # diff_dict의 모든 파라미터에 대해 잔차 KDE 그리기
    for param_name, residuals in diff_dict.items():
        if not residuals:
            print(f"No data found for parameter: {param_name}")
            continue  # 빈 데이터는 스킵

        # map_size는 [width, height]의 2차원 배열이므로 각각 따로 계산
        if param_name == "map_size":
            widths = np.array([size[0] for size in residuals])
            heights = np.array([size[1] for size in residuals])
            
            # Width에 대한 KDE 그리기
            plot_kde(widths, param_name, "Width")
            
            # Height에 대한 KDE 그리기
            plot_kde(heights, param_name, "Height")
        else:
            # 나머지 파라미터들은 1차원 배열 그대로 사용
            residuals = np.array(residuals).flatten()

            # 나머지 파라미터에 대한 KDE 그리기
            plot_kde(residuals, param_name, param_name)

# 예시: diff_dict의 모든 파라미터에 대해 KDE 플롯 그리기
plot_residual_histogram(diff_dict)


In [None]:
# Calculate novelty and diversity
novelty = statistician.calc_novelty()
print(novelty)

In [None]:
# Calculate diversity
diversity = statistician.calc_diversity()
print(diversity)

In [None]:
import labeler

data_list= labeler.load_folder(path=config.PREPROCESSED_PATH, file_count=84)
data_count = 0

for i, data_i in enumerate(data_list):
    data_count += len(data_i["map_list"])

print(data_count)