In [1]:
import pandas as pd
import numpy as np

In [2]:
import glob
import os
import random
csvs = glob.glob('Player A-*.csv')
print(f"Loading {len(csvs)} files")
columns = [f'cor_{i}' for i in range(25)] + ['status', 'attack']
train_csvs = random.sample(csvs, int(len(csvs)*.8))
test_csvs = random.sample(csvs, int(len(csvs)*.2))
print(f"train - {len(train_csvs)}, test - {len(test_csvs)}")
train_data = pd.DataFrame()
for csv in train_csvs:
    df = pd.read_csv(csv, index_col=None, header=None, names=columns)
    df['prev_attack'] = np.append(['N/A'], df['attack'].to_numpy()[0:-1])
    train_data = pd.concat([train_data, df])
test_data = pd.DataFrame()
for csv in test_csvs:
    df = pd.read_csv(csv, index_col=None, header=None, names=columns)
    df['prev_attack'] = np.append(['N/A'], df['attack'].to_numpy()[0:-1])    
    test_data = pd.concat([test_data, df])
train_data.shape, test_data.shape

Loading 48 files
train - 38, test - 9


((528, 28), (129, 28))

In [3]:
before = len(train_data)
train_data = train_data.drop(train_data['prev_attack'] == 'N/A')
print(f"Dropped {before - len(train_data)} rows")

before = len(test_data)
test_data = test_data.drop(test_data['prev_attack'] == 'N/A')
print(f"Dropped {before - len(test_data)} rows")

Dropped 73 rows
Dropped 18 rows


In [5]:
train_data.status.unique()

array([' Hit', ' Ship Sunk', ' Miss', ' Won'], dtype=object)

In [6]:
def clean_status(df):
    return df.status.apply(str.strip)\
                        .apply(str.lower)\
                        .apply(lambda v: v.replace(' ', ''))\
                        .apply(lambda v: v.replace('n/a', 'na'))
train_data.status = clean_status(train_data)
test_data.status = clean_status(test_data)
train_data.status.head(), test_data.status.head()

(2         hit
 3         hit
 4    shipsunk
 5        miss
 6         hit
 Name: status, dtype: object, 2     hit
 3     hit
 4    miss
 5    miss
 6     hit
 Name: status, dtype: object)

In [7]:
def droprows_na(df):
    before = len(df)
    df = df.dropna(how='any')
    print(f'Dropped {before-len(df)} rows')
    return df

train_data = droprows_na(train_data)
test_data = droprows_na(test_data)

Dropped 29 rows
Dropped 6 rows


In [8]:
train_data.status.unique()

array(['hit', 'shipsunk', 'miss'], dtype=object)

In [51]:
def split_attack(df, col):
    df[col] = df[col].apply(str.lower)
    df[f'{col}_col'] = df[col].apply(lambda v: v[0])
    df[f'{col}_row'] = df[col].apply(lambda v: v[1])
    df = df.drop(columns=[col])
    df.head()
    return df
    
train_data = split_attack(train_data, 'attack')
train_data = split_attack(train_data, 'prev_attack')
test_data = split_attack(test_data, 'attack')
test_data = split_attack(test_data, 'prev_attack')
train_data.columns, test_data.columns

(Index(['cor_0', 'cor_1', 'cor_2', 'cor_3', 'cor_4', 'cor_5', 'cor_6', 'cor_7',
        'cor_8', 'cor_9', 'cor_10', 'cor_11', 'cor_12', 'cor_13', 'cor_14',
        'cor_15', 'cor_16', 'cor_17', 'cor_18', 'cor_19', 'cor_20', 'cor_21',
        'cor_22', 'cor_23', 'cor_24', 'status', 'attack_col', 'attack_row',
        'prev_attack_col', 'prev_attack_row'],
       dtype='object'),
 Index(['cor_0', 'cor_1', 'cor_2', 'cor_3', 'cor_4', 'cor_5', 'cor_6', 'cor_7',
        'cor_8', 'cor_9', 'cor_10', 'cor_11', 'cor_12', 'cor_13', 'cor_14',
        'cor_15', 'cor_16', 'cor_17', 'cor_18', 'cor_19', 'cor_20', 'cor_21',
        'cor_22', 'cor_23', 'cor_24', 'status', 'attack_col', 'attack_row',
        'prev_attack_col', 'prev_attack_row'],
       dtype='object'))

In [52]:
# train_data.columns[train_data.columns.str.match('cor_|status|prev_attack_col')]
train_data.head()

Unnamed: 0,cor_0,cor_1,cor_2,cor_3,cor_4,cor_5,cor_6,cor_7,cor_8,cor_9,...,cor_20,cor_21,cor_22,cor_23,cor_24,status,attack_col,attack_row,prev_attack_col,prev_attack_row
2,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,miss,e,5,c,3
3,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,hit,e,4,e,5
4,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,hit,e,3,e,4
5,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,shipsunk,a,5,e,3
6,0,-1,0,0,0,0,0,0,0,0,...,-1,0,0,0,1,miss,d,1,a,5


# Data preparation for col classifier

In [54]:
X_train = train_data.loc[:, train_data.columns.str.match('^cor_|^status|^prev_attack_')]
y_train = train_data.loc[:, train_data.columns.str.startswith('attack_col')]
X_train.head(10), y_train.head(2)

(    cor_0  cor_1  cor_2  cor_3  cor_4  cor_5  cor_6  cor_7  cor_8  cor_9  ...  \
 2       0     -1      0      0      0      0      0      0      0      0  ...   
 3       0     -1      0      0      0      0      0      0      0      0  ...   
 4       0     -1      0      0      0      0      0      0      0      0  ...   
 5       0     -1      0      0      0      0      0      0      0      0  ...   
 6       0     -1      0      0      0      0      0      0      0      0  ...   
 7       0     -1      0     -1      0      0      0      0      0      0  ...   
 8       0     -1      0     -1      0      0      0      0      0      0  ...   
 9       0     -1      0     -1      0      0      0      1      0      0  ...   
 10      0     -1      0     -1      0      0      0      1      1      0  ...   
 11      0     -1      0     -1      0      0      1      1      1      0  ...   
 
     cor_18  cor_19  cor_20  cor_21  cor_22  cor_23  cor_24    status  \
 2        0       0    

In [56]:
X_test = test_data.loc[:, test_data.columns.str.match('^cor_|^status|^prev_attack_')]
y_test = test_data.loc[:, test_data.columns.str.startswith('attack_col')]
X_test.head(2), y_test.head(2)

(   cor_0  cor_1  cor_2  cor_3  cor_4  cor_5  cor_6  cor_7  cor_8  cor_9  ...  \
 2      0      0      0      0      0      0      0      0      0      0  ...   
 3      0      0      0      0      0      0      0      0      0      0  ...   
 
    cor_18  cor_19  cor_20  cor_21  cor_22  cor_23  cor_24  status  \
 2       0       0      -1       0       0       0       0    miss   
 3       0       0      -1       0       0       0       0    miss   
 
    prev_attack_col  prev_attack_row  
 2                b                4  
 3                c                3  
 
 [2 rows x 28 columns],   attack_col
 2          c
 3          d)

In [57]:
X_train = pd.get_dummies(X_train, columns=['status','prev_attack_col','prev_attack_row'])
X_test  = pd.get_dummies(X_test, columns=['status','prev_attack_col','prev_attack_row'])

In [58]:
X_train.columns

Index(['cor_0', 'cor_1', 'cor_2', 'cor_3', 'cor_4', 'cor_5', 'cor_6', 'cor_7',
       'cor_8', 'cor_9', 'cor_10', 'cor_11', 'cor_12', 'cor_13', 'cor_14',
       'cor_15', 'cor_16', 'cor_17', 'cor_18', 'cor_19', 'cor_20', 'cor_21',
       'cor_22', 'cor_23', 'cor_24', 'status_hit', 'status_miss',
       'status_shipsunk', 'prev_attack_col_a', 'prev_attack_col_b',
       'prev_attack_col_c', 'prev_attack_col_d', 'prev_attack_col_e',
       'prev_attack_row_1', 'prev_attack_row_2', 'prev_attack_row_3',
       'prev_attack_row_4', 'prev_attack_row_5'],
      dtype='object')

In [59]:
y_train.head()

Unnamed: 0,attack_col
2,e
3,e
4,e
5,a
6,d


In [60]:
X_train.to_csv('dataset_X_train.csv', index_label='index')
y_train.to_csv('dataset_y_train.csv', index_label='index')
X_train.shape, y_train.shape

((419, 38), (419, 1))

In [61]:
X_test.head()

Unnamed: 0,cor_0,cor_1,cor_2,cor_3,cor_4,cor_5,cor_6,cor_7,cor_8,cor_9,...,prev_attack_col_a,prev_attack_col_b,prev_attack_col_c,prev_attack_col_d,prev_attack_col_e,prev_attack_row_1,prev_attack_row_2,prev_attack_row_3,prev_attack_row_4,prev_attack_row_5
2,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
5,0,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
6,0,0,0,0,0,1,0,0,1,0,...,0,0,0,1,0,0,1,0,0,0


In [62]:
y_test.head()

Unnamed: 0,attack_col
2,c
3,d
4,a
5,d
6,e


In [63]:
X_test.to_csv('dataset_X_test.csv', index_label='index')
y_test.to_csv('dataset_y_test.csv', index_label='index')
X_test.shape, y_test.shape

((99, 38), (99, 1))

# Data preparation for row classifier

In [64]:
train_data.shape, test_data.shape

((419, 30), (99, 30))

In [65]:
train_data.columns, test_data.columns

(Index(['cor_0', 'cor_1', 'cor_2', 'cor_3', 'cor_4', 'cor_5', 'cor_6', 'cor_7',
        'cor_8', 'cor_9', 'cor_10', 'cor_11', 'cor_12', 'cor_13', 'cor_14',
        'cor_15', 'cor_16', 'cor_17', 'cor_18', 'cor_19', 'cor_20', 'cor_21',
        'cor_22', 'cor_23', 'cor_24', 'status', 'attack_col', 'attack_row',
        'prev_attack_col', 'prev_attack_row'],
       dtype='object'),
 Index(['cor_0', 'cor_1', 'cor_2', 'cor_3', 'cor_4', 'cor_5', 'cor_6', 'cor_7',
        'cor_8', 'cor_9', 'cor_10', 'cor_11', 'cor_12', 'cor_13', 'cor_14',
        'cor_15', 'cor_16', 'cor_17', 'cor_18', 'cor_19', 'cor_20', 'cor_21',
        'cor_22', 'cor_23', 'cor_24', 'status', 'attack_col', 'attack_row',
        'prev_attack_col', 'prev_attack_row'],
       dtype='object'))

In [66]:
X_train_row = train_data.loc[:,train_data.columns[train_data.columns.str.match('^cor_|^status|^prev_attack_|^attack_col')]]
y_train_row = train_data.loc[:, ['attack_row']]
X_test_row = test_data.loc[:,test_data.columns[test_data.columns.str.match('^cor_|^status|^prev_attack_|^attack_col')]]
y_test_row = test_data.loc[:, ['attack_row']]
X_train_row.shape, X_test_row.shape, y_train_row.shape, y_test_row.shape

((419, 29), (99, 29), (419, 1), (99, 1))

In [67]:
X_train_row = pd.get_dummies(X_train_row, columns=['status', 'prev_attack_col', 'prev_attack_row', 'attack_col'])
X_test_row = pd.get_dummies(X_test_row, columns=['status', 'prev_attack_col', 'prev_attack_row', 'attack_col'])

In [68]:
X_train_row.to_csv('dataset_X_train_row.csv', index_label='index')
y_train_row.to_csv('dataset_y_train_row.csv', index_label='index')
X_test_row.to_csv('dataset_X_test_row.csv', index_label='index')
y_test_row.to_csv('dataset_y_test_row.csv', index_label='index')
X_train_row.shape, y_train_row.shape, X_test_row.shape, y_test_row.shape

((419, 43), (419, 1), (99, 43), (99, 1))

In [69]:
X_train_row.head()

Unnamed: 0,cor_0,cor_1,cor_2,cor_3,cor_4,cor_5,cor_6,cor_7,cor_8,cor_9,...,prev_attack_row_1,prev_attack_row_2,prev_attack_row_3,prev_attack_row_4,prev_attack_row_5,attack_col_a,attack_col_b,attack_col_c,attack_col_d,attack_col_e
2,0,-1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
3,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
4,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
5,0,-1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
6,0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0


In [70]:
y_train_row.head()

Unnamed: 0,attack_row
2,5
3,4
4,3
5,5
6,1
