In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
from fastparquet import ParquetFile
from fastparquet import write
import pyarrow as pa

### Load the pickled model and load the data from the Parquet file into a data frame. Run the data through the model

In [2]:
with open("trained_model.pkl", 'rb') as f:
    model = pickle.load(f)
    
df = pd.read_parquet('clean_data.parquet', engine='fastparquet')
df

Unnamed: 0,timestamp,unit_id,unit_type,location_x,location_y,destination_x,destination_y,homeworld
0,2023-06-13 17:33:18,1,at-st,2.0,8.0,1,1,Glee Anselm
1,2023-06-13 17:33:17,2,tie_silencer,4.0,4.0,0,1,Trandosha
2,2023-06-13 17:33:16,3,at-at,0.0,3.0,6,1,Corellia
3,2023-06-13 17:33:15,4,tie_silencer,6.0,1.0,6,9,Shili
4,2023-06-13 17:33:14,5,tie_fighter,0.0,4.0,9,6,Muunilinst
...,...,...,...,...,...,...,...,...
9999995,2023-02-18 10:26:20,9999996,resistance_soldier,6.0,6.0,1,3,Troiken
9999996,2023-02-18 10:26:19,9999997,tie_silencer,8.0,6.0,2,0,Kashyyyk
9999997,2023-02-18 10:26:18,9999998,tie_fighter,7.0,7.0,6,4,Kashyyyk
9999998,2023-02-18 10:26:17,9999999,tie_fighter,6.0,6.0,8,8,Kalee


In [3]:
X = df[['homeworld', 'unit_type']]

# One-hot encode categorical features
X_encoded = pd.get_dummies(X)


# Make predictions on the test data
y_pred = model.predict(X_encoded)
y_pred

array([False, False, False, ...,  True, False, False])

In [4]:
df['predictions'] = y_pred
df

Unnamed: 0,timestamp,unit_id,unit_type,location_x,location_y,destination_x,destination_y,homeworld,predictions
0,2023-06-13 17:33:18,1,at-st,2.0,8.0,1,1,Glee Anselm,False
1,2023-06-13 17:33:17,2,tie_silencer,4.0,4.0,0,1,Trandosha,False
2,2023-06-13 17:33:16,3,at-at,0.0,3.0,6,1,Corellia,False
3,2023-06-13 17:33:15,4,tie_silencer,6.0,1.0,6,9,Shili,True
4,2023-06-13 17:33:14,5,tie_fighter,0.0,4.0,9,6,Muunilinst,False
...,...,...,...,...,...,...,...,...,...
9999995,2023-02-18 10:26:20,9999996,resistance_soldier,6.0,6.0,1,3,Troiken,True
9999996,2023-02-18 10:26:19,9999997,tie_silencer,8.0,6.0,2,0,Kashyyyk,True
9999997,2023-02-18 10:26:18,9999998,tie_fighter,7.0,7.0,6,4,Kashyyyk,True
9999998,2023-02-18 10:26:17,9999999,tie_fighter,6.0,6.0,8,8,Kalee,False
