In [15]:
import itertools
import json
import os
import numpy as np
import pandas as pd

from collections import Counter

pd.options.display.max_columns = 50
pd.options.display.max_rows = 30

# MdMC utils

In [2]:
class MCModel:
    def __init__(self) -> None:
        self.dmatrix = np.array(
            [
                [0, 0, 0],
                [0, 1, 1],
                [0, 1, 2],
            ]
        )


In [4]:
def submatrix_to_count(array):
    return "".join((array[1, 1], array[1, 2], array[2, 1])), array[2, 2]

def get_all_submatrices(array, xmax=23, ymax=40, size=2):
    all_submatrices = []
    for x in range(xmax-size):
        for y in range(ymax-size):
            all_submatrices.append(array[x:x+3, y:y+3])
    return all_submatrices

## Prepro with only fg

In [5]:
with open("../data_pcg_ready/40x23_fg/lvls_fg.json", "r") as f:
    d_levels = json.load(f)

all_tiles = []
for lv in d_levels.values():
    all_tiles.extend(np.array(lv).flatten())
all_tiles = set(all_tiles)

d_absolute_counts = {}
for perm in itertools.product(all_tiles, repeat=3):
    d_absolute_counts["".join(perm)] = []

for lvl in d_levels.values():
    for submatrix in get_all_submatrices(np.array(lvl)):
        pattern, tiletype = submatrix_to_count(submatrix)
        d_absolute_counts[pattern].append(tiletype)

for key in d_absolute_counts.keys():
    d_absolute_counts[key] = dict(Counter(d_absolute_counts[key]))

d_proba_estimation = {}

for key in d_absolute_counts.keys():
    d_temp = d_absolute_counts[key]
    if d_temp:        
        d_proba_estimation[key] = {k: (v / total) for total in (sum(d_temp.values()),) for k, v in d_temp.items()}

## Pre-processing with recombined maps

In [40]:
all_tiles_and_entities = ["0", "1", "^", "<", "v", ">", "D", "C", "_", "O", "Q", "W", "S", "B", "R", "F", "L"]

In [41]:
data_path = "../data_pcg_ready/rooms_40x23/"
os.listdir(data_path)

['3_lvl_08-d_fg.csv',
 '3_lvl_11-z_fg.csv',
 '7_lvl_d-05b_fg.csv',
 '3_lvl_01-b_fg.csv',
 '6_lvl_boss-15_fg.csv',
 'LostLevels_lvl_e-03-dummy_fg.csv',
 '8_lvl_inside_fg.csv',
 '2_lvl_d6_fg.csv',
 '3_lvl_11-y_fg.csv',
 '5_lvl_b-03_fg.csv',
 '3_lvl_04-c_fg.csv',
 '2_lvl_6_fg.csv',
 '6_lvl_boss-04_fg.csv',
 '3_lvl_12-y_fg.csv',
 '9H_lvl_b-00_fg.csv',
 'LostLevels_lvl_h-04b_fg.csv',
 '1H_lvl_08_fg.csv',
 '4_lvl_a-10_fg.csv',
 '5_lvl_c-10_fg.csv',
 '3X_lvl_02-dummy_fg.csv',
 '1_lvl_3_fg.csv',
 '2_lvl_d7_fg.csv',
 '2_lvl_end_3c_fg.csv',
 '5_lvl_a-10_fg.csv',
 '3_lvl_10-c_fg.csv',
 '6_lvl_boss-05_fg.csv',
 '1_lvl_6c_fg.csv',
 '7_lvl_a-04b_fg.csv',
 '3_lvl_13-x_fg.csv',
 '3_lvl_08-b_fg.csv',
 '6_lvl_b-00c_fg.csv',
 '5_lvl_a-07_fg.csv',
 '3_lvl_08-a_fg.csv',
 '1_lvl_8_fg.csv',
 '5_lvl_a-13_fg.csv',
 '1H_lvl_00_fg.csv',
 '7_lvl_f-02_fg.csv',
 '3_lvl_04-b_fg.csv',
 '3_lvl_07-a_fg.csv',
 '2_lvl_5_fg.csv',
 '8_lvl_outside_fg.csv',
 '5_lvl_a-15_fg.csv',
 '3_lvl_02-a_fg.csv',
 '3_lvl_0x-a_fg.csv',
 '

In [42]:
d_absolute_counts = {}
for perm in itertools.product(all_tiles_and_entities, repeat=3):
    d_absolute_counts["".join(perm)] = []

In [43]:
for fn in os.listdir(data_path):
    lvl_temp = pd.read_csv(os.path.join(data_path, fn), header=None, sep=";", dtype=str).to_numpy(dtype=str)
    for submatrix in get_all_submatrices(np.array(lvl_temp)):
        pattern, tiletype = submatrix_to_count(submatrix)
        d_absolute_counts[pattern].append(tiletype)

In [44]:
for key in d_absolute_counts.keys():
    d_absolute_counts[key] = dict(Counter(d_absolute_counts[key]))

d_proba_estimation = {}

for key in d_absolute_counts.keys():
    d_temp = d_absolute_counts[key]
    if d_temp:
        d_proba_estimation[key] = {k: (v / total) for total in (sum(d_temp.values()),) for k, v in d_temp.items()}

In [45]:
d_proba_estimation

{'000': {'0': 0.9770323060705649,
  'R': 0.0007068890680225708,
  'S': 0.01505549699262107,
  '1': 0.004501767222670056,
  '_': 0.0009177156321696534,
  '^': 0.0009425187573634278,
  'C': 0.00029763750232529297,
  'Q': 0.0003348421901159546,
  '<': 0.00014881875116264649,
  'D': 3.720468779066162e-05,
  'O': 2.4803125193774414e-05},
 '001': {'0': 0.10569327158812312,
  '1': 0.8812312721329338,
  'S': 0.009261781530918006,
  'W': 0.00027240533914464724,
  '>': 0.0005448106782892945,
  '^': 0.002451648052301825,
  '_': 0.0005448106782892945},
 '00^': {'^': 0.8327586206896552,
  '0': 0.15517241379310345,
  '1': 0.006896551724137931,
  '_': 0.0034482758620689655,
  'S': 0.0017241379310344827},
 '00<': {'1': 1.0},
 '00>': {'0': 1.0},
 '00D': {'D': 0.972972972972973, '0': 0.02702702702702703},
 '00C': {'C': 0.6229508196721312, '0': 0.3770491803278688},
 '00_': {'_': 0.7335526315789473,
  '0': 0.2565789473684211,
  '^': 0.006578947368421052,
  '1': 0.003289473684210526},
 '00O': {'0': 1.0},
 

In [46]:
for pattern in d_proba_estimation.keys():
    for symbol in all_tiles_and_entities:
        if symbol not in d_proba_estimation[pattern].keys():
            d_proba_estimation[pattern][symbol] = 0.0

In [47]:
d_proba_estimation

{'000': {'0': 0.9770323060705649,
  'R': 0.0007068890680225708,
  'S': 0.01505549699262107,
  '1': 0.004501767222670056,
  '_': 0.0009177156321696534,
  '^': 0.0009425187573634278,
  'C': 0.00029763750232529297,
  'Q': 0.0003348421901159546,
  '<': 0.00014881875116264649,
  'D': 3.720468779066162e-05,
  'O': 2.4803125193774414e-05,
  'v': 0.0,
  '>': 0.0,
  'W': 0.0,
  'B': 0.0,
  'F': 0.0,
  'L': 0.0},
 '001': {'0': 0.10569327158812312,
  '1': 0.8812312721329338,
  'S': 0.009261781530918006,
  'W': 0.00027240533914464724,
  '>': 0.0005448106782892945,
  '^': 0.002451648052301825,
  '_': 0.0005448106782892945,
  '<': 0.0,
  'v': 0.0,
  'D': 0.0,
  'C': 0.0,
  'O': 0.0,
  'Q': 0.0,
  'B': 0.0,
  'R': 0.0,
  'F': 0.0,
  'L': 0.0},
 '00^': {'^': 0.8327586206896552,
  '0': 0.15517241379310345,
  '1': 0.006896551724137931,
  '_': 0.0034482758620689655,
  'S': 0.0017241379310344827,
  '<': 0.0,
  'v': 0.0,
  '>': 0.0,
  'D': 0.0,
  'C': 0.0,
  'O': 0.0,
  'Q': 0.0,
  'W': 0.0,
  'B': 0.0,
  

In [48]:
with open("./probability_estimation.json", "w") as file:
    json.dump(d_proba_estimation, file, indent=4)