In [37]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle

CONCISE = True
TEST_RATIO = 0.10

# Load the dataset
filename = "troop_movements_1m.csv"
data = pd.read_csv(filename)
df = pd.DataFrame(data)

# Replace 'invalid_unit' with unknown
df['unit_type'] = df['unit_type'].replace('invalid_unit', 'unknown')
# Fill missing location_x and location_y with ffill
df['location_x'] = df['location_x'].ffill()
df['location_y'] = df['location_y'].ffill()

# Save the clean data into Parquet format
df.to_parquet("troop_movements_1m_cleaned.parquet", index=False)

if CONCISE:
    display(df.head())
else:
    display(df)

Unnamed: 0,timestamp,unit_id,unit_type,location_x,location_y,destination_x,destination_y,homeworld
0,2023-06-03 03:19:15,919214,tie_silencer,2.0,5.0,9,3,Aleen Minor
1,2023-02-24 13:50:40,9467154,stormtrooper,9.0,0.0,9,1,Malastare
2,2023-03-29 19:54:55,6585778,tie_silencer,0.0,6.0,5,9,Serenno
3,2023-04-30 00:58:11,3878023,tie_silencer,4.0,2.0,9,9,Tund
4,2023-04-10 22:00:26,5537117,at-st,6.0,8.0,5,8,Skako


In [38]:
with open("trained_model.pkl", "rb") as file:
    model = pickle.load(file)
    
    # Load data from Parquet
    df = pd.read_parquet("troop_movements_1m_cleaned.parquet")
    
    X = df[['unit_type', 'homeworld']]
    X_enc = pd.get_dummies(X)
    
    # Run the data through the model and add the predicated values to the data frame
    df['predictions'] = model.predict(X_enc)
    

if CONCISE:
    display(df.head())
else:
    display(df)

Unnamed: 0,timestamp,unit_id,unit_type,location_x,location_y,destination_x,destination_y,homeworld,predictions
0,2023-06-03 03:19:15,919214,tie_silencer,2.0,5.0,9,3,Aleen Minor,False
1,2023-02-24 13:50:40,9467154,stormtrooper,9.0,0.0,9,1,Malastare,False
2,2023-03-29 19:54:55,6585778,tie_silencer,0.0,6.0,5,9,Serenno,False
3,2023-04-30 00:58:11,3878023,tie_silencer,4.0,2.0,9,9,Tund,False
4,2023-04-10 22:00:26,5537117,at-st,6.0,8.0,5,8,Skako,False
