<h1>Imports</h1>

In [None]:
import os
import sys
import gzip

import pandas as pd
import numpy as np

from sklearn.utils import shuffle

sys.path.append('..')
from slp_package import slp_functions

<h1> Load data </h1>

**Note**: We use a label 1 for Mango vs Zain and 0 for Mango vs Kodorin.

We filter the DataFrame to contain Mango vs Kodorin and Mango vs Zain games. We then iterate through it to get all file paths and corresponding labels.

In [None]:
# Load game data df containing only mango's data
df = slp_functions.create_merged_game_data_df(['mango'])
df = df[(((df['player_1_netplay_code'] == 'MANG#0') & (df['player_2_netplay_code'] == 'KOD#0'))) |
        (((df['player_2_netplay_code'] == 'MANG#0') & (df['player_1_netplay_code'] == 'KOD#0'))) |
         (((df['player_1_netplay_code'] == 'MANG#0') & (df['player_2_netplay_code'] == 'ZAIN#0'))) |
         (((df['player_2_netplay_code'] == 'MANG#0') & (df['player_1_netplay_code'] == 'ZAIN#0')))]
df = df[df['length'] >= 1024]

X = []
y = []
netcodes = ['KOD#0', 'ZAIN#0']

# Get minimum number of segments
min_segments = -1
for netcode in netcodes:
    segments = 0
    segments += len(df.loc[df['player_1_netplay_code'] == netcode, 'player_2_inputs_np_save_path'].tolist()) 
    segments += len(df.loc[df['player_2_netplay_code'] == netcode, 'player_1_inputs_np_save_path'].tolist()) 
    print(f'{segments} segments for {netcode}')
    if segments < min_segments or min_segments == -1:
        min_segments = segments

# Get file names for desired rows
for netcode in netcodes:
    # Get's mang0's inputs when he's playing the desired netcode
    paths_1 = df.loc[df['player_1_netplay_code'] == netcode, 'player_2_inputs_np_save_path'].tolist()
    paths_2 = df.loc[df['player_2_netplay_code'] == netcode, 'player_1_inputs_np_save_path'].tolist()
    paths = paths_1 + paths_2
    paths = paths[:min_segments]
    X.extend(paths)
    y.extend([1 if netcode == 'ZAIN#0' else 0] * len(paths))
    print(f'{len(paths)} rows added for {netcode}')


<h1> Shuffle and Save Data </h1>

In [None]:
# Shuffle the dataset to mix up the order of characters
X, y = shuffle(np.array(X), np.array(y), random_state=42)

# At this point, X and y are your balanced dataset ready for further processing
print('Total number of data points: ', X.shape[0])

# Save data lists
save_path = 'C:/Users/jaspa/Grant ML/slp/data/'

with gzip.open(os.path.join(save_path,'mango_vs_zain_or_kodorin.npy.gz'), 'wb') as f:
    np.save(f, X)

with gzip.open(os.path.join(save_path,'mango_vs_zain_or_kodorin.npy.gz'), 'wb') as f:
    np.save(f, y)