In [340]:
import pandas as pd
import Configurations as CF

In [341]:
df = pd.read_csv("Data/Formations_info.csv")

In [342]:
df.columns

Index(['Formation', 'Mode', 'Formation Base', 'Description', 'Available In',
       'Counter Formations', 'GK', 'LCB', 'CB', 'RCB', 'RM', 'RCM', 'CDM',
       'LCM', 'LM', 'LS', 'RS', 'RB', 'LB', 'RWB', 'LWB', 'CM', 'CAM', 'RW',
       'LW', 'ST', 'CF', 'LF', 'RF', 'RAM', 'LAM', 'LDM', 'RDM'],
      dtype='str')

In [343]:
for col in df.columns:
    if col in CF.ROLE_LIST:
        print(f"{col} - {df[col].unique()}")

LCB - [1 0]
CB - [1 3 2 0]
RCB - [1 0]
RM - [1 0]
RCM - [1 0]
CDM - [1 0 2]
LCM - [1 0]
LM - [1 0]
LS - [1 0]
RS - [1 0]
RB - [0 1]
LB - [0 1]
RWB - [0 1]
LWB - [0 1]
CM - [0 2 1 3]
CAM - [0 1 2 3]
ST - [0 2 1]
CF - [0 1]
LF - [0 1]
RF - [0 1]
RAM - [0 1]
LAM - [0 1]
LDM - [0 1]
RDM - [0 1]


In [344]:
df[['LCB', 'CB', 'RCB']].head(10)

Unnamed: 0,LCB,CB,RCB
0,1,1,1
1,0,3,0
2,0,3,0
3,1,1,1
4,1,1,1
5,1,1,1
6,0,3,0
7,0,2,0
8,0,2,0
9,1,0,1


In [345]:
cols = ['GK', 'LCB', 'CB', 'RCB', 'RM', 'RCM', 'CDM',
       'LCM', 'LM', 'LS', 'RS', 'RB', 'LB', 'RWB', 'LWB', 'CM', 'CAM', 'RW',
       'LW', 'ST', 'CF', 'LF', 'RF', 'RAM', 'LAM', 'LDM', 'RDM']

df["Total Players"] = df[cols].sum(axis=1)

In [346]:
df = df[df["Total Players"] == 11]
df.shape

(45, 34)

In [347]:
df["Total Players"].unique()

array([11])

In [348]:
def normalize_three_role(df, main, left, center, right):

    # Only normalize rows where side roles are empty
    mask = (
        (df[main] > 1) &
        (df[left] == 0) &
        (df[right] == 0)
    )

    # Case 1: exactly 2 → left + right
    mask_2 = mask & (df[main] == 2)
    df.loc[mask_2, left] = 1
    df.loc[mask_2, right] = 1
    df.loc[mask_2, main] = 0   # remove aggregated

    # Case 2: 3 or more
    mask_3 = mask & (df[main] >= 3)

    df.loc[mask_3, left] = 1
    df.loc[mask_3, right] = 1

    # Only set center if it is NOT the same as main
    if center != main:
        df.loc[mask_3, center] = 1
        df.loc[mask_3, main] = 0
    else:
        # center == main (like CB case)
        df.loc[mask_3, main] = 1

    return df


In [349]:
# Apply normalization
df = normalize_three_role(df, "CB", "LCB", "CB", "RCB")
df = normalize_three_role(df, "CM", "LCM", "CM", "RCM")
df = normalize_three_role(df, "CAM", "LAM", "CAM", "RAM")
df = normalize_three_role(df, "CDM", "LDM", "CDM", "RDM")
df = normalize_three_role(df, "ST", "LS", "ST", "RS")


In [350]:
df[["LCB", "CB", "RCB"]].head(10)

Unnamed: 0,LCB,CB,RCB
0,1,1,1
1,1,1,1
2,1,1,1
3,1,1,1
4,1,1,1
5,1,1,1
6,1,1,1
7,1,0,1
8,1,0,1
9,1,0,1


In [351]:
import numpy as np

def split_formation_name(name):
    parts = name.split(" ", 1)  # split only on first space
    
    structure = parts[0]
    
    if len(parts) > 1:
        shape = parts[1]
    else:
        shape = np.nan
        
    return pd.Series([structure, shape])


df[["Structure", "Shape"]] = df["Formation"].apply(split_formation_name)
df[["Formation", "Structure", "Shape", "Mode"]].head(5)

Unnamed: 0,Formation,Structure,Shape,Mode
0,3-1-4-2,3-1-4-2,,Attacking / Midfield (Moderate)
1,3-4-1-2,3-4-1-2,,Attacking (Moderate)
2,3-4-2-1,3-4-2-1,,Attacking
3,3-4-3 Diamond,3-4-3,Diamond,Attacking
4,3-4-3 Flat,3-4-3,Flat,Attacking


In [352]:
# First clean mode text (remove bracket text like "(Moderate)")
df["Mode"] = df["Mode"].str.replace(r"\(.*?\)", "", regex=True)

# Split by "/"
df["Mode"] = df["Mode"].str.split("/")

# Explode into multiple rows
df = df.explode("Mode")

# Clean spaces
df["Mode"] = df["Mode"].str.strip()

df[["Formation", "Structure", "Shape", "Mode"]].head(5)

Unnamed: 0,Formation,Structure,Shape,Mode
0,3-1-4-2,3-1-4-2,,Attacking
0,3-1-4-2,3-1-4-2,,Midfield
1,3-4-1-2,3-4-1-2,,Attacking
2,3-4-2-1,3-4-2-1,,Attacking
3,3-4-3 Diamond,3-4-3,Diamond,Attacking


In [353]:
df.columns

Index(['Formation', 'Mode', 'Formation Base', 'Description', 'Available In',
       'Counter Formations', 'GK', 'LCB', 'CB', 'RCB', 'RM', 'RCM', 'CDM',
       'LCM', 'LM', 'LS', 'RS', 'RB', 'LB', 'RWB', 'LWB', 'CM', 'CAM', 'RW',
       'LW', 'ST', 'CF', 'LF', 'RF', 'RAM', 'LAM', 'LDM', 'RDM',
       'Total Players', 'Structure', 'Shape'],
      dtype='str')

In [354]:
cols = ['GK', 'LCB', 'CB', 'RCB', 'RM', 'RCM', 'CDM',
        'LCM', 'LM', 'LS', 'RS', 'RB', 'LB', 'RWB', 'LWB',
        'CM', 'CAM', 'RW', 'LW', 'ST', 'CF', 'LF', 'RF',
        'RAM', 'LAM', 'LDM', 'RDM']

df["Total Players"] = df[cols].sum(axis=1)
df["Total Players"].unique()

array([11])

In [355]:
len(df.columns)

36

In [356]:
final_cols = ( 
    ["Formation", "Structure", "Shape", 'Mode', 'Formation Base'] +
    CF.ROLE_LIST +
    ["Total Players", 'Counter Formations', "Description"]
)
len(final_cols)

32

In [357]:
df = df[final_cols]
df.head()

Unnamed: 0,Formation,Structure,Shape,Mode,Formation Base,LCB,CB,RCB,LB,RB,...,RWB,CF,LF,RF,ST,LS,RS,Total Players,Counter Formations,Description
0,3-1-4-2,3-1-4-2,,Attacking,3-5-2,1,1,1,0,0,...,0,0,0,0,0,1,1,11,4-5-1 / 4-1-3-2,Suitable for the games you’re trying to win an...
0,3-1-4-2,3-1-4-2,,Midfield,3-5-2,1,1,1,0,0,...,0,0,0,0,0,1,1,11,4-5-1 / 4-1-3-2,Suitable for the games you’re trying to win an...
1,3-4-1-2,3-4-1-2,,Attacking,3-5-2,1,1,1,0,0,...,0,0,0,0,0,1,1,11,4-5-1,Suitable for the games you are trying to win b...
2,3-4-2-1,3-4-2-1,,Attacking,3-4-3,1,1,1,0,0,...,0,0,1,1,1,0,0,11,4-5-1 / 4-2-3-1,A variation of 3-4-1-2 formation but more atta...
3,3-4-3 Diamond,3-4-3,Diamond,Attacking,3-4-3,1,1,1,0,0,...,0,0,0,0,1,0,0,11,4-3-1-2 / 4-2-3-1 / 5-3-2,Suitable for the games where you want to conce...


In [358]:
df.to_csv("Data/Formations_info_transformed.csv", index=False)