In [1]:
"""
    This is the main script that will create the predictions on test data and save 
    a predictions file.
"""
import time
from pathlib import Path
import pickle
import mlflow
import utils
import pandas as pd
import numpy as np
import ew_Validator
import ns_id_Validator
import ns_ik_Validator
import ss_Validator

In [5]:
start_time = time.time()

# INPUT/OUTPUT PATHS WITHIN THE DOCKER CONTAINER
TRAINED_MODEL_DIR = Path('./trained_model/')
TEST_DATA_DIR = Path('../../dataset/data_subset/test/')
TEST_PREDS_FP = Path('../../submission/submission.csv')


ss_validator = ss_Validator.SS_Validator()
ew_validator = ew_Validator.EW_Validator()
ns_ik_validator = ns_ik_Validator.NS_IK_Validator()
ns_id_validator = ns_id_Validator.NS_ID_Validator()

# Rest of configuration, specific to this submission
delta_column = "Delta_SemimajorAxis"

feature_cols_EW = [
    "Eccentricity",
    "Semimajor Axis (m)",
    # "Inclination (deg)",
    "RAAN (deg)",
    "Argument of Periapsis (deg)",
    # "True Anomaly (deg)",
    # "Latitude (deg)",
    "Longitude (deg)",
    "Altitude (m)",
    # "X (m)",
    # "Y (m)",
    # "Z (m)",
    # "Vx (m/s)",
    # "Vy (m/s)",
    # "Vz (m/s)",
    # delta_column,
]

lag_steps = 0

test_data, updated_feature_cols = utils.tabularize_data(
    TEST_DATA_DIR, feature_cols, lag_steps=lag_steps)

print("Data Tabularization Complete")

Data Tabularization Complete


In [None]:


# Load the trained models (don't use the utils module, use pickle)
model_EW = pickle.load(open(TRAINED_MODEL_DIR / 'model_EW.pkl', 'rb'))
le_EW = pickle.load(open(TRAINED_MODEL_DIR / 'le_EW.pkl', 'rb'))



# mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
# mlflow.set_experiment("ARCLab Competition")
# mlflow.sklearn.autolog(log_models = True)

# with mlflow.start_run():
# Make predictions on the test data for EW
test_data['Predicted_EW'] = le_EW.inverse_transform(
    model_EW.predict(test_data[updated_feature_cols])
)
# Trash the models to free up the memory.
model_EW = None
le_EW = None

print("EW Predictions are done")

model_NS = pickle.load(open(TRAINED_MODEL_DIR / 'model_NS.pkl', 'rb'))
le_NS = pickle.load(open(TRAINED_MODEL_DIR / 'le_NS.pkl', 'rb'))

# Make predictions on the test data for NS
test_data['Predicted_NS'] = le_NS.inverse_transform(
    model_NS.predict(test_data[updated_feature_cols])
)

print("NS Predictions are done")

# Print the first few rows of the test data with predictions for both EW and NS
test_results = utils.convert_classifier_output(test_data)


In [None]:
test_results

In [None]:
validated_results = ss_validator.apply_validator(test_results, test_data)

validated_results = ew_validator.apply_validator(test_results, test_data)


validated_results = ns_ik_validator.apply_validator(test_results, test_data)


validated_results = ns_id_validator.apply_validator(test_results, test_data)

In [None]:

merged_test_results = stripped_test_results.copy()
merged_test_results = merged_test_results.sort_values(by=['ObjectID', 'TimeIndex']).reset_index(drop=True)
merged_test_results

In [None]:
# Save the test results to a csv file to be submitted to the challenge
merged_test_results.to_csv(TEST_PREDS_FP, index=False)
print("Saved predictions to: {}".format(TEST_PREDS_FP))

# time.sleep(360) # TEMPORARY FIX TO OVERCOME EVALAI BUG

In [6]:
import os
import sys

print(os.path.abspath(os.curdir))

module_paths = [
    os.path.abspath(os.path.join('../..')),
]
for module_path in module_paths:
    if module_path not in sys.path:
        sys.path.append(module_path)

from baseline_submissions.evaluation import NodeDetectionEvaluator

# Load the ground truth data
ground_truth_df = pd.read_csv('../../dataset/data_subset/train_labels.csv')
test_results = pd.read_csv('../../submission/submission.csv')

validated_results = ss_validator.apply_validator(test_results, test_data)
validated_results = ew_validator.apply_validator(validated_results, test_data)
validated_results = ns_ik_validator.apply_validator(validated_results, test_data)
validated_results = ns_id_validator.apply_validator(validated_results, test_data)

evaluator = NodeDetectionEvaluator(ground_truth_df, validated_results, tolerance=6)
precision, recall, f2, rmse = evaluator.score(debug=True)
print("")
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F2: {f2:.2f}')
print(f'RMSE: {rmse:.2f}')

c:\Users\Joe\Desktop\Projects\ARCLab-MIT\splid-devkit\baseline_submissions\ml_python
Total TPs: 15
Total FPs: 319
Total FNs: 4
Total Distances: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Total Wrong Nodes: 1
Total Wrong Types: 2
Total Not Matched: 316
Total EW FP: 1
Total NS FP: 318

Precision: 0.04
Recall: 0.79
F2: 0.18
RMSE: 0.00
