In [None]:
import gymnasium as gym
import gymnasium_2048
import pickle
import pandas as pd
import numpy as np
import torch
import sys
sys.path.append('..')

from simulations.run import create_dataset, preprocess_dataset
from simulations.base_agents import NNAgent

%load_ext autoreload
%autoreload 2

## Создание датасета

In [2]:
ENV_ID = "gymnasium_2048/TwentyFortyEight-v0"

In [3]:
env = gym.make(ENV_ID)
agent = NNAgent(env)
agent.load_weights('../weights_ep5000.pth')
agent.model.eval()

Net(
  (conv1): Conv2d(16, 128, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(128, 128, kernel_size=(2, 2), stride=(1, 1))
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=4, bias=True)
)

In [None]:
dataset, scores = create_dataset(
    env, 
    agent,
    n_episodes=200,
    visualize=False,
    on_illegal='ask', 
    save_path=['../nn_expert_dataset.pkl']
)

100%|██████████| 200/200 [03:22<00:00,  1.01s/it]


Датасет сохранён в: ../nn_expert_dataset1.pkl
Собрано 86431 ходов из 200 эпизодов.


In [31]:
np.mean(scores)

np.float64(6352.3)

## Загрузка датасета

In [None]:
with open("../nn_expert_dataset.pkl", "rb") as f:
    data = pickle.load(f)

In [33]:
data.keys()

dict_keys(['boards', 'features', 'actions', 'rewards', 'episode_ids', 'step_ids', 'Q-values'])

In [44]:
features_narrow = [
    'snake_weighted_sum',            
    'monotonicity',   
    'tile_sum',    
    'potential_merges', 
    'corner_weighted_sum',
    'num_empty',
    'max_tile',
    'smoothness',
    'corner_sum',
    'second_max_tile',
    'edge_occupancy',   
    'conv_vert_gradient', 
    'conv_horiz_gradient',
    'entropy'  
]

In [None]:
# загрузка данных по 100 лучшим партиям с расширением
features, target = preprocess_dataset(
    "../nn_expert_dataset.pkl", 
    keep_best=100, 
    features_list=features_narrow, 
    expand=True, 
    normalize=True
)
dataset = {
    'features': features, 
    'target': target
}

print(f'length of the dataset: {len(features)}')
with open('../nn_expert_dataset_expanded.pkl', "wb") as f:
    pickle.dump(dataset, f)

length of the dataset: 232452


In [54]:
# загрузка данных по каждой сложности отдельно (в зависимости от количества свободных ячеек на поле)
(
    (features_easy, target_easy), 
    (features_medium, target_medium), 
    (features_hard, target_hard)
) = preprocess_dataset(
    "../nn_expert_dataset.pkl", 
    keep_best=100, 
    features_list=features_narrow, 
    expand=True, 
    transform='divide_by_empty',
    normalize=True
)

condition_dict = {
    'easy': {
        'length': len(features_easy), 
        'condition': '(count_empty >= 8)'
    }, 
    'medium': {
        'length': len(features_medium), 
        'condition': '(count_empty < 8) & (count_empty >= 4)'
    }, 
    'hard': {
        'length': len(features_hard), 
        'condition': '(count_empty < 8)'
    }, 
}

for key, val in condition_dict.items():
    print('-' * 50)
    print(f'Information for level {key}')
    conditioon = val['condition']
    print(f'condition: {conditioon}')
    length = val['length']
    print(f'length of the expanded dataset: {length}')
    
    
dataset = {
    'features_easy': features_easy, 
    'target_easy': target_easy, 
    'features_medium': features_medium, 
    'target_medium': target_medium, 
    'features_hard': features_hard, 
    'target_hard': target_hard 
}

with open('../nn_expert_dataset_expanded_level.pkl', "wb") as f:
    pickle.dump(dataset, f)

--------------------------------------------------
Information for level easy
condition: (count_empty >= 8)
length of the expanded dataset: 15079
--------------------------------------------------
Information for level medium
condition: (count_empty < 8) & (count_empty >= 4)
length of the expanded dataset: 116335
--------------------------------------------------
Information for level hard
condition: (count_empty < 8)
length of the expanded dataset: 107098
