# Transforming Data with Pre-Processing Pipeline

## Setup

In [1]:
import sys
sys.path.append("..")

In [2]:
import pandas as pd

import starcraft_predictor as scp



The pre-processing pipeline can be used to generate modelling features from the raw data generated by the `ReplayEngine()`.

## Load raw data

In [3]:
data = pd.read_csv("../example_data/example_replay_data.csv")

In [4]:
data

Unnamed: 0.1,Unnamed: 0,filehash,winner,player_1_race,player_2_race,seconds,food_made_1,food_made_2,food_used_1,food_used_2,...,vespene_used_current_1,vespene_used_current_2,vespene_used_current_army_1,vespene_used_current_army_2,vespene_used_current_economy_1,vespene_used_current_economy_2,vespene_used_current_technology_1,vespene_used_current_technology_2,workers_active_count_1,workers_active_count_2
0,0,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,0.0,15.0,15.0,12.0,12.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0
1,1,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,10.0,15.0,15.0,13.0,13.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0
2,2,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,20.0,15.0,15.0,13.0,14.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,13.0
3,3,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,30.0,15.0,15.0,14.0,14.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,13.0
4,4,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,40.0,15.0,15.0,14.0,15.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,161,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1610.0,173.0,289.0,71.0,181.0,...,100.0,3925.0,0.0,3025.0,0.0,0.0,100.0,900.0,59.0,70.0
162,162,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1620.0,150.0,289.0,56.0,191.0,...,100.0,4075.0,0.0,3175.0,0.0,0.0,100.0,900.0,48.0,70.0
163,163,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1630.0,150.0,289.0,54.0,193.0,...,100.0,4075.0,0.0,3175.0,0.0,0.0,100.0,900.0,48.0,70.0
164,164,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1640.0,150.0,289.0,48.0,195.0,...,100.0,4525.0,0.0,3475.0,0.0,0.0,100.0,1050.0,48.0,70.0


## Pre-process data

The pre-processing pipeline follows the scikit-learn `fit()` and `transform()` approach to pre-processing.

In [5]:
transformed_data = scp.sc2_preprocessing_pipeline.transform(data)

player_1 and player_2 columns have now been replaced by engineered columns representing their differences.

In [6]:
transformed_data

Unnamed: 0.1,Unnamed: 0,filehash,winner,player_1_race,player_2_race,seconds,food_made_diff,food_used_diff,minerals_collection_rate_diff,minerals_lost_diff,...,minerals_used_current_army_diff,minerals_used_current_economy_diff,vespene_collection_rate_diff,vespene_lost_diff,vespene_lost_army_diff,vespene_used_current_diff,vespene_used_current_army_diff,vespene_used_current_economy_diff,vespene_used_current_technology_diff,workers_active_count_diff
0,0,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,10.0,0.0,0.0,42.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,20.0,0.0,-1.0,-28.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,30.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,40.0,0.0,-1.0,0.0,0.0,...,0.0,-50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,161,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1610.0,-116.0,-110.0,-280.0,15175.0,...,-5000.0,-3525.0,-1613.0,-3700.0,-3700.0,-3825.0,-3025.0,0.0,-800.0,-11.0
162,162,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1620.0,-139.0,-135.0,-504.0,16625.0,...,-5250.0,-4575.0,-1612.0,-3700.0,-3700.0,-3975.0,-3175.0,0.0,-800.0,-22.0
163,163,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1630.0,-139.0,-139.0,-952.0,16725.0,...,-5450.0,-4575.0,-1591.0,-3700.0,-3700.0,-3975.0,-3175.0,0.0,-800.0,-22.0
164,164,1b017c9c244e1c635a717f5a69b45afee739737b6ed594...,2,Protoss,Protoss,1640.0,-139.0,-147.0,-1092.0,17025.0,...,-6350.0,-4575.0,-1523.0,-3700.0,-3700.0,-4425.0,-3475.0,0.0,-950.0,-22.0
