# Determine Multiple Baserate Metric Distribution for MA

In [1]:
%matplotlib inline

In [2]:
import os, re
import json
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import (plot_acf, plot_pacf)
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import datetime
from dateutil.parser import parse
import sys
sys.path.append(("../.."))
from Baserate.main.baserate import (
    MaBaserate
)
from ExpressScore.main.express_score import MaScorer
import warnings
warnings.filterwarnings("ignore")

import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use("fivethirtyeight");

import subprocess
import shutil

In the code cell below we need to specify the warning start date, warning end date, and countries to be scored

In [3]:
SCORE_START_DATE = "2018-11-04"
SCORE_END_DATE = "2018-11-24"
MA_COUNTRIES = ["Iraq", "Syria"]

In [4]:
EVT_ABBR = "MA"
EVT_TYPE = "Military Activity"
MC_HOME = os.path.abspath("../../..")
MCT_HOME = os.path.abspath("../../../../mercury-challenge-team")
RESULT_HOME = os.path.join(MCT_HOME, "data", "scoring_results", "multiple_baserate")
DOC_PATH = os.path.join(MC_HOME, "doc")
EVT_DOC_PATH = os.path.join(DOC_PATH, "scoring", EVT_ABBR)
DATA_PATH = os.path.join(MC_HOME, "data")
GSR_PATH = os.path.join(DATA_PATH, "gsr")
MA_GSR_PATH = os.path.join(GSR_PATH, "ma_gsr")
WARN_PATH = os.path.join(DATA_PATH, "baserate_warnings")
MA_WARN_PATH = os.path.join(WARN_PATH, "baserate_ma_warnings")
RESOURCE_PATH = os.path.join(MC_HOME, "src", "Baserate", "resources")
HISTORY_LOOKBACK = 365
HISTORY_DELAY = 14
PARTICIPANT_ID = "Mercury_MBR"

N_ITER = 100
DATE_BUFFER = 4
MAX_DIST = 100
DIST_BUFFER = 16.66667

DEFAULT_FIG_SIZE = (8,5)

DEBUG = False

DATE_FORMAT = "%Y-%m-%d"

LT_MEAN = 3

score_start_date = parse(SCORE_START_DATE)
BUFFER_START_DATE = str((score_start_date - datetime.timedelta(DATE_BUFFER)).date())
score_end_date = parse(SCORE_END_DATE)
BUFFER_END_DATE = str((score_end_date + datetime.timedelta(DATE_BUFFER)).date())
print(BUFFER_START_DATE, SCORE_START_DATE, SCORE_END_DATE, BUFFER_END_DATE)

hist_end_date = parse(BUFFER_START_DATE) - datetime.timedelta(1 + HISTORY_DELAY)
hist_end_date = hist_end_date.strftime(DATE_FORMAT)
hist_start_date = parse(BUFFER_START_DATE) - datetime.timedelta(365 + HISTORY_DELAY)
hist_start_date = hist_start_date.strftime(DATE_FORMAT)
print(hist_end_date, hist_start_date)


score_param_filepath = os.path.join(RESULT_HOME, "Score_MA.json")
warn_scratch_filepath = os.path.join(RESULT_HOME, "MBR_MA_Warnings.json")


EXPORT_CMD = ["export", "MERC_DB=localhost"]

SCORE_CMD = ["curl", "-H", "Content-Type: application/json", 
             "-XPOST", "http://localhost:8053/score", "-d",
             "@"+os.path.abspath(score_param_filepath)]

CLEAR_WARN_CMD = ["curl", 
                  "http://localhost:8029/warning/remove_all/{}".format(PARTICIPANT_ID)]

LOAD_WARN_CMD = ["curl", "-H", "Content-Type: application/json", 
                 "-XPOST", "http://localhost:8029/warning/dev-intake", "--data-binary", 
                 "@"+os.path.abspath(warn_scratch_filepath)]

FILENAME_TEMPLATE = "MONTHSTR/Baserate_EVTTYPE_ITER_MONTHSTR.json"


score_dict_template = {"Include Matching": "false", "Start Date": SCORE_START_DATE,
                       "End Date": SCORE_END_DATE, "Performer ID": PARTICIPANT_ID,
                       "Max Distance": MAX_DIST, "Distance Buffer": DIST_BUFFER, 
                       "Date Buffer": DATE_BUFFER, "Event Type": EVT_TYPE}

2018-10-31 2018-11-04 2018-11-24 2018-11-28
2018-10-16 2017-10-17


Load the GSR from the directory

In [5]:
gsr_files = [x for x in os.listdir(MA_GSR_PATH) if x.endswith("json")]
print(gsr_files)
gsr = []
for gf in gsr_files:
    with open(os.path.join(MA_GSR_PATH, gf), "r", encoding="utf-8-sig") as f:
        monthly_gsr = json.load(f)
        gsr += monthly_gsr
print(len(gsr))

['MA_May_2015.json', 'MA_August_2018.json', 'MA_September_2017.json', 'MA_January_2016.json', 'MA_November_2017.json', 'MA_October_2017.json', 'MA_April_2017.json', 'MA_March_2018.json', 'MA_August_2015.json', 'MA_February_2018.json', 'MA_May_2018.json', 'MA_April_2016.json', 'MA_October_2016.json', 'MA_November_2016.json', 'MA_January_2017.json', 'MA_September_2016.json', 'MA_June_2015.json', 'MA_July_2016.json', 'MA_December_2016.json', 'MA_June_2018.json', 'MA_December_2017.json', 'MA_July_2017.json', 'MA_June_2017.json', 'MA_July_2018.json', 'MA_July_2015.json', 'MA_June_2016.json', 'MA_December_2015.json', 'MA_October_2015.json', 'MA_November_2015.json', 'MA_September_2015.json', 'MA_March_2017.json', 'MA_February_2017.json', 'MA_May_2017.json', 'MA_January_2018.json', 'MA_August_2016.json', 'MA_August_2017.json', 'MA_September_2018.json', 'MA_November_2018.json', 'MA_April_2018.json', 'MA_October_2018.json', 'MA_May_2016.json', 'MA_February_2016.json', 'MA_March_2016.json']
97595

In [6]:
print(CLEAR_WARN_CMD)
proc = subprocess.Popen(CLEAR_WARN_CMD, stdout=subprocess.PIPE)
clearout, clearerrs = proc.communicate()

print(clearout, clearerrs)



In [7]:
result_dict = dict()
mbr_result_filename = "MA MBR Results {} to {}.json".format(SCORE_START_DATE,
                                                            SCORE_END_DATE)
mbr_result_path = os.path.join(RESULT_HOME, mbr_result_filename)
#_COUNTRIES = ["Egypt", "Lebanon"]
_COUNTRIES = MA_COUNTRIES
for cc in _COUNTRIES:
    print(cc)
    cc_path_str = re.sub(" ", "_", cc)
    br = MaBaserate(cc)
    cc_gsr = [x for x in gsr if x["Country"] == cc]
    print("There are {} GSR events".format(len(cc_gsr)))
    score_dict = score_dict_template.copy()
    score_dict["Country"] = cc
    with open(score_param_filepath, "w") as f:
        json.dump(score_dict, f)
    for i in range(N_ITER):
        if i%(N_ITER//20) == 0:
            print("Iteration {}".format(i))
        try:
            preds=br.make_predictions(BUFFER_START_DATE, BUFFER_END_DATE, 
                                      history_delay=HISTORY_DELAY,
                                      gsr=cc_gsr)
            pred_json = br.convert_warnings_to_json(preds)
            pred_json["participant_id"] = PARTICIPANT_ID
        except AttributeError:
            pred_json = {"participant_id": PARTICIPANT_ID, "payload": []}

        with open(warn_scratch_filepath, "w") as f:
            json.dump(pred_json, f, ensure_ascii=False, indent=2)
        # Load the warnings
        proc = subprocess.Popen(LOAD_WARN_CMD, stdout=subprocess.PIPE)
        loadout, loaderrs = proc.communicate()
        #print(loadout, loaderrs)
        # Score the warnings
        proc = subprocess.Popen(SCORE_CMD, stdout=subprocess.PIPE)
        scoreout, scoreerrs = proc.communicate()
        #print(scoreout, scoreerrs)
        scoring = json.loads(scoreout.decode("utf-8"))["Scoring"]["Results"]
        key_ = "{}_{}".format(cc, i)
        result_dict[key_] = scoring
        proc = subprocess.Popen(CLEAR_WARN_CMD, stdout=subprocess.PIPE)
        clearout, clearerrs = proc.communicate()
        out_path = os.path.join(RESULT_HOME, 
                                "{} MBR Results {} to {}.json".format(cc, SCORE_START_DATE,
                                                                      SCORE_END_DATE))

    with open(mbr_result_path, "w") as f:
        json.dump(result_dict, f, ensure_ascii=False, sort_keys=True, indent=2)


    #print(out_path)

Iraq
There are 22861 GSR events
Iteration 0
Iteration 5
Iteration 10
Iteration 15
Iteration 20
Iteration 25
Iteration 30
Iteration 35
Iteration 40
Iteration 45
Iteration 50
Iteration 55
Iteration 60
Iteration 65
Iteration 70
Iteration 75
Iteration 80
Iteration 85
Iteration 90
Iteration 95
Syria
There are 70526 GSR events
Iteration 0
Iteration 5
Iteration 10
Iteration 15
Iteration 20
Iteration 25
Iteration 30
Iteration 35
Iteration 40
Iteration 45
Iteration 50
Iteration 55
Iteration 60
Iteration 65
Iteration 70
Iteration 75
Iteration 80
Iteration 85
Iteration 90
Iteration 95


In [8]:
warn_scratch_filepath



In [9]:
out_path

'/Users/peterhaglich/Dropbox/Work/IARPA/Mercury/peterhaglich/mercury-challenge-team/data/scoring_results/multiple_baserate/Syria MBR Results 2018-11-04 to 2018-11-24.json'