In [1]:
# no. of times to sample a single combo to add defensive moves
SAMPLE_TIMES = 100

In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv("basic_drills.txt", sep="\t", header=None)


In [3]:
df.head()


Unnamed: 0,0
0,"1, 2"
1,"1, 2, 3"
2,"1, 4"
3,"2, 3"
4,"2, 3, 2"


In [4]:
# raw dataframe info
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       19 non-null     object
dtypes: object(1)
memory usage: 280.0+ bytes


In [5]:
# raw data description
df.describe()


Unnamed: 0,0
count,19
unique,18
top,"1, 2, 5"
freq,2


In [6]:
from itertools import product


def add_star_and_combine(strings):
    # Add * to each string
    strings_with_star = [s + "*" for s in strings]

    # Generate all possible combinations of strings with and without *
    combinations = product([True, False], repeat=len(strings))

    # Combine strings according to the combination pattern
    result = []
    for pattern in combinations:
        combined = [
            s if not pattern[i] else strings_with_star[i] for i, s in enumerate(strings)
        ]
        result.append(combined)

    return result


In [7]:
# add body shots to the data in any positions
def add_body_shots(row):
    combo = [x.strip() for x in row[0].split(",")]
    expanded_combos = add_star_and_combine(combo)
    # print(combo)
    return expanded_combos


In [8]:
# add body shots to the data in any positions
df = df.apply(add_body_shots, axis=1).explode(0)


In [9]:
df.head()


0        [1*, 2*]
0         [1*, 2]
0         [1, 2*]
0          [1, 2]
1    [1*, 2*, 3*]
dtype: object

In [10]:
df.info()


<class 'pandas.core.series.Series'>
Int64Index: 200 entries, 0 to 18
Series name: None
Non-Null Count  Dtype 
--------------  ----- 
200 non-null    object
dtypes: object(1)
memory usage: 3.1+ KB


In [11]:
import random

def add_defensive_moves_and_combine(strings):
    defensive_moves = [7, 8, 9, 10, 11, 12, 13]
    expanded_combos = []
    for _ in range(SAMPLE_TIMES):
        expanded_combo = []
        # insert defensive moves to random positions
        for i in range(len(strings)):
            if random.random() < 0.3:
                # add a defensive move
                added_def_move = str(random.choice(defensive_moves))
                # extend the list with the defensive move
                expanded_combo.extend([strings[i], added_def_move])
                # continue to add another defensive move or uppercuts
                # if it is a 7 or 10
                if added_def_move in ["7", "12"] and random.random() < 0.5:
                    expanded_combo.append(random.choice(["8", "5", "5*", "3", "3*", "1", "1*"]))
                if added_def_move in ["8", "11"] and random.random() < 0.5:
                    expanded_combo.append(random.choice(["2", "2*", "4", "4*", "6", "6*"]))
            else:
                expanded_combo.append(strings[i])
        expanded_combos.append(expanded_combo)
    
    # remove duplicates 
    expanded_combos = [list(x) for x in set(tuple(x) for x in expanded_combos)]
    return expanded_combos

In [12]:
# add dodge to the data in any positions
# doges include token: 7 - 13
# usually left slip is followed by a right slip, or a left hook/left uppercut

def add_dodge(row):
    combo = row
    print(combo)
    expanded_combos = add_defensive_moves_and_combine(combo)
    # print(combo)
    return expanded_combos

In [13]:
# add body shots to the data in any positions
df = df.apply(add_dodge).explode(0)

['1*', '2*']
['1*', '2']
['1', '2*']
['1', '2']
['1*', '2*', '3*']
['1*', '2*', '3']
['1*', '2', '3*']
['1*', '2', '3']
['1', '2*', '3*']
['1', '2*', '3']
['1', '2', '3*']
['1', '2', '3']
['1*', '4*']
['1*', '4']
['1', '4*']
['1', '4']
['2*', '3*']
['2*', '3']
['2', '3*']
['2', '3']
['2*', '3*', '2*']
['2*', '3*', '2']
['2*', '3', '2*']
['2*', '3', '2']
['2', '3*', '2*']
['2', '3*', '2']
['2', '3', '2*']
['2', '3', '2']
['6*', '3*']
['6*', '3']
['6', '3*']
['6', '3']
['5*', '2*']
['5*', '2']
['5', '2*']
['5', '2']
['1*', '2*', '5*']
['1*', '2*', '5']
['1*', '2', '5*']
['1*', '2', '5']
['1', '2*', '5*']
['1', '2*', '5']
['1', '2', '5*']
['1', '2', '5']
['5*', '6*', '3*']
['5*', '6*', '3']
['5*', '6', '3*']
['5*', '6', '3']
['5', '6*', '3*']
['5', '6*', '3']
['5', '6', '3*']
['5', '6', '3']
['1*', '1*', '2*']
['1*', '1*', '2']
['1*', '1', '2*']
['1*', '1', '2']
['1', '1*', '2*']
['1', '1*', '2']
['1', '1', '2*']
['1', '1', '2']
['1*', '2*', '1*']
['1*', '2*', '1']
['1*', '2', '1*']
['1*'

In [14]:
df.head()

0          [1*, 12, 2*]
0       [1*, 12, 3, 2*]
0       [1*, 2*, 8, 6*]
0    [1*, 9, 2*, 8, 6*]
0       [1*, 2*, 12, 5]
dtype: object

In [15]:
df.info()

<class 'pandas.core.series.Series'>
Int64Index: 12275 entries, 0 to 18
Series name: None
Non-Null Count  Dtype 
--------------  ----- 
12275 non-null  object
dtypes: object(1)
memory usage: 191.8+ KB


In [16]:
# add special start and end tokens
df = df.apply(lambda x: ",".join(['14'] + x + ['15']))
df.head()

0        14,1*,12,2*,15
0      14,1*,12,3,2*,15
0      14,1*,2*,8,6*,15
0    14,1*,9,2*,8,6*,15
0      14,1*,2*,12,5,15
dtype: object

In [17]:
# split the dataframe to train, validation and test
data = df.values
# shuffle the data before splitting
np.random.shuffle(data)
# split the data
train, validate, test = data[:len(data) * 7 // 10], data[len(data) * 7 // 10: len(data) * 9 // 10], data[len(data) * 9 // 10:]

In [18]:
train, len(train)

(array(['14,1,2,9,1*,2,3*,4,15', '14,2,3,8,6*,2*,15',
        '14,2,12,3*,6*,11,15', ..., '14,3,10,4*,8,6*,3,2,15',
        '14,2*,3*,13,2,15', '14,1,11,6*,2,13,5*,15'], dtype=object),
 8592)

In [19]:
validate, len(validate)

(array(['14,2,8,3*,9,2*,15', '14,2,3*,10,6*,15', '14,1,13,1,2*,8,2*,15',
        ..., '14,1*,8,2,1,2*,15', '14,3,4*,12,3*,2,13,15',
        '14,1*,2*,1,2,3,4*,10,15'], dtype=object),
 2455)

In [20]:
test, len(test)

(array(['14,1,2,5,12,15', '14,1*,2,1,12,3,2*,7,15', '14,1*,2,1*,2,15', ...,
        '14,1*,2*,11,1,2,3*,13,4,10,15',
        '14,1*,2,12,5*,1,11,4*,2*,3,7,4*,15', '14,1,2,1,2*,11,3*,7,4*,15'],
       dtype=object),
 1228)

In [21]:
# write the data to files
pd.DataFrame(train).to_csv("train.txt", sep="\t", header=None, index=None)
pd.DataFrame(validate).to_csv("validate.txt", sep="\t", header=None, index=None)
pd.DataFrame(test).to_csv("test.txt", sep="\t", header=None, index=None)