In [1]:
pwd

'/Users/peterhaglich/Dropbox/Work/IARPA/Mercury/peterhaglich/mercury-challenge/src/ExpressScore/notebooks'

In [2]:
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from dateutil.parser import parse
import datetime
import calendar
import json
import os
import re

import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use("fivethirtyeight")

from collections import Counter

from geopy.distance import distance

In [32]:
EVENT_TYPE = "Military Action"
EVT_ABBR = "MA"
COUNTRY = "Iraq"

month_str = "May 2018"
month_path_str = "_".join(month_str.split(" "))

FIRST_DATE = parse("2018-05-01")
LAST_DATE = parse("2018-05-31")

MERC_CHALLENGE_HOME = os.path.abspath("../../../..")

MERC_HOME = os.path.join(MERC_CHALLENGE_HOME, "..", "mercury")
EXPRESS_SCORE_PATH = os.path.join(MERC_CHALLENGE_HOME, "src", "ExpressScore")
print(os.listdir(EXPRESS_SCORE_PATH))
ES_TEST_RESOURCE_PATH = os.path.join(EXPRESS_SCORE_PATH, "resources", "test", "iq_ma_may_2018")
DATA_HOME = os.path.join(MERC_HOME, "data")
WARN_PATH = os.path.join(DATA_HOME, "baserate_warnings", "MANSA")
month_warn_path = os.path.join(WARN_PATH, month_path_str)
MANSA_GSR_PATH = os.path.join(MERC_CHALLENGE_HOME, "data", "gsr", "ma_gsr")

MAX_DIST = 100.0
DIST_BUFFER = 16.67
MAX_DATE_DIFF = 4.0

['.DS_Store', '__init__.py', '__pycache__', 'main', 'notebooks', 'README.md', 'resources', 'test']


In [5]:
br_warn_filename = "Baserate_MANSA_{}.json".format(month_path_str)
br_warn_path = os.path.join(WARN_PATH, br_warn_filename)
with open(br_warn_path, "r", encoding="utf8") as f:
    br_warn = json.load(f)
br_warn = [w for w in br_warn["payload"] if w["Event_Type"] == "Military Action"]
br_country_counts = Counter([w["Country"] for w in br_warn])
print(br_country_counts)

Counter({'Syria': 1685, 'Iraq': 529, 'Lebanon': 42, 'Egypt': 35, 'Saudi Arabia': 5, 'Yemen': 5})


In [6]:
gsr_filename = "MA_{}.json".format(month_path_str)
gsr_path = os.path.join(MANSA_GSR_PATH, gsr_filename)
with open(gsr_path, "r", encoding="utf8") as f:
    gsr = json.load(f)
ma_gsr = [e for e in gsr if e["Event_Type"] == "Military Action"]
gsr_country_counts = Counter([e["Country"] for e in ma_gsr])
print(gsr_country_counts)

Counter({'Syria': 884, 'Iraq': 439, 'Saudi Arabia': 13, 'Egypt': 10, 'Yemen': 6, 'Lebanon': 4})


In [7]:
test_evt = [e for e in ma_gsr if e["Country"] == COUNTRY][0]
test_evt

{'Actor': 'Iraqi Police',
 'Approximate_Location': 'True',
 'City': 'As Sa‘dīyah',
 'Country': 'Iraq',
 'Earliest_Reported_Date': '2018-05-01',
 'Event_Date': '2018-05-01',
 'Event_ID': 'MN265362',
 'Event_Subtype': 'Force Posture',
 'Event_Type': 'Military Action',
 'First_Reported_Link': 'https://www.iraqinews.com/iraq-war/islamic-state-leader-in-charge-of-weapon-stashes-arrested-northeast-of-diyala/',
 'GSS_Link': 'https://www.iraqinews.com/iraq-war/islamic-state-leader-in-charge-of-weapon-stashes-arrested-northeast-of-diyala/',
 'Latitude': 33.8211,
 'Longitude': 44.4114,
 'News_Source': 'Iraqi News',
 'Other_Links': 'http://www.ninanews.com/News_Details.aspx?nqCSdku1zTWhddS7PtZL0g%253d%253d;https://www.ninanews.com/News_Details.aspx?%20WIPsEHX6BhXtzQ0N%252bXYVig%253d%253d;',
 'Revision_Date': '2018-06-11',
 'State': 'Diyālá'}

In [33]:
cc_gsr = [e for e in ma_gsr if e["Country"] == COUNTRY]
print(len(cc_gsr))
for i, e in enumerate(cc_gsr):
    new_id = "MN{}".format(i)
    e["Event_ID"] = new_id
out_filename = "test_cc_gsr.json"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
with open(out_path, "w") as f:
    json.dump(cc_gsr, f, ensure_ascii=False, indent=2)

439


In [34]:
cc_warn = [w for w in br_warn if w["Country"] == COUNTRY]
cc_warn = [w for w in cc_warn if parse(w["Event_Date"]) <= LAST_DATE
           and parse(w["Event_Date"]) >= FIRST_DATE]
for w in cc_warn:
    if w["Event_Subtype"] == "Armed Conflict":
        w["Event_Subtype"] = "Conflict"
    keys_ = list(w.keys())
    for k in keys_:
        if re.findall("Target", k):
            del w[k]
print(len(cc_warn))
for i, w in enumerate(cc_warn):
    new_id = "BR_{}".format(i)
    w["Warning_ID"] = new_id
out_filename = "test_cc_warnings.json"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
with open(out_path, "w") as f:
    json.dump(cc_warn, f, ensure_ascii=False, indent=2)

529


In [10]:
cc_gsr[0]

{'Actor': 'Iraqi Police',
 'Approximate_Location': 'True',
 'City': 'As Sa‘dīyah',
 'Country': 'Iraq',
 'Earliest_Reported_Date': '2018-05-01',
 'Event_Date': '2018-05-01',
 'Event_ID': 'MN0',
 'Event_Subtype': 'Force Posture',
 'Event_Type': 'Military Action',
 'First_Reported_Link': 'https://www.iraqinews.com/iraq-war/islamic-state-leader-in-charge-of-weapon-stashes-arrested-northeast-of-diyala/',
 'GSS_Link': 'https://www.iraqinews.com/iraq-war/islamic-state-leader-in-charge-of-weapon-stashes-arrested-northeast-of-diyala/',
 'Latitude': 33.8211,
 'Longitude': 44.4114,
 'News_Source': 'Iraqi News',
 'Other_Links': 'http://www.ninanews.com/News_Details.aspx?nqCSdku1zTWhddS7PtZL0g%253d%253d;https://www.ninanews.com/News_Details.aspx?%20WIPsEHX6BhXtzQ0N%252bXYVig%253d%253d;',
 'Revision_Date': '2018-06-11',
 'State': 'Diyālá'}

In [11]:
cc_warn[0]

{'City': 'Baghdad',
 'Country': 'Iraq',
 'Actor': 'Iraqi Police',
 'timestamp': '2018-05-08T8:15:58.0',
 'Longitude': 44.4009,
 'Probability': 0.6682906558,
 'Event_Subtype': 'Conflict',
 'Event_Type': 'Military Action',
 'State': 'Mayorality of Baghdad',
 'Latitude': 33.3406,
 'Event_Date': '2018-05-12'}

In [12]:
def dist(warn, evt):
    w_lat = warn["Latitude"]
    w_long = warn["Longitude"]
    e_lat = evt["Latitude"]
    e_long = evt["Longitude"]
    return distance((w_lat, w_long), (e_lat, e_long)).km
def dist_to_warn(w, gsr_list):
    return [dist(w, e) for e in gsr_list]
def date_diff(warn, evt):
    w_date = parse(warn["Event_Date"])
    e_date = parse(evt["Event_Date"])
    delta = (w_date - e_date).days
    return delta
def date_diff_to_warn(w, gsr_list):
    return [date_diff(w, e) for e in gsr_list]
def es_match(warn, evt):
    w_es = warn["Event_Subtype"]
    e_es = evt["Event_Subtype"]
    return (w_es == e_es)
def es_match_to_warn(w, gsr_list):
    return [int(es_match(w, e)) for e in gsr_list]
def actor_match(warn, evt):
    e_actors = evt["Actor"].split(";")
    return (warn["Actor"] in e_actors)
def actor_match_to_warn(w, gsr_list):
    return [int(actor_match(w, e)) for e in gsr_list]
def ls(dist, approx_flag=False):
    max_dist = MAX_DIST - approx_flag*DIST_BUFFER
    dist = max(0, dist-approx_flag*DIST_BUFFER)
    return 1 - dist/max_dist
ls_vfunc = np.vectorize(ls)
def ds(date_diff):
    return 1 - abs(date_diff)/MAX_DATE_DIFF
ds_vfunc = np.vectorize(ds)

In [13]:
print(date_diff(cc_warn[0], cc_gsr[0]))
print(date_diff_to_warn(cc_warn[0], cc_gsr))

print(dist(cc_warn[0], cc_gsr[0]))

print(dist_to_warn(cc_warn[0], cc_gsr))
print(es_match(cc_warn[0], cc_gsr[0]))

print(es_match_to_warn(cc_warn[0], cc_gsr))
print(actor_match(cc_warn[0], cc_gsr[0]))
print(actor_match_to_warn(cc_warn[0], cc_gsr))

11
[11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 10, 9, 9, 9, 9, 9, 8, 8, 8, 10, 10, 8, 10, 8, 8, 8, 7, 7, 8, 7, 7, 7, 10, 8, 8, 8, 3, 3, 4, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 6, 6, 6, 6, 7, 9, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 4, 4, 4, 0, 0, -1, -1, -1, -1, 4, 0, -1, -1, 0, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 10, 3, 1, 2, -2, -2, 3, 3, -2, -2, -2, 10, 9, 9, 9, -2, -2, 3, 3, -3, -3, -3, -3, -3, -3, -3, -3, 3, 3, -3, -3, 6, -1, -3, -1, 0, 0, 2, 0, 3, 3, 3, 3, 3, -6, -6, -6, -5, -2, -2, -6, -6, -6, -6, -6, -6, 1, -6, -6, -6, -5, -5, -7, -5, -7, -8, -8, 5, 5, 5, -5, -7, -7, -7, -7, -5, -5, -8, -5, -6, -6, -6, -6, -6, -6, -5, -5, -5, -5, -5, -5, -5, -5, -4, -5, -4, 2, -9, -9, -9, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -5, -8, -8, -8, -8, -8, -8, 8, -6, -6, 8, -5, -9, -5, -5, -5, 9, 9, -4, -4, -4, 0, -10, -9, -9, -10, -10, -10, -10, -10, -9, -9, -9, -9, 4, -10,

In [14]:
print(cc_warn[0]["Event_Subtype"], cc_gsr[0]["Event_Subtype"], cc_gsr[1]["Event_Subtype"])

Conflict Force Posture Conflict


In [15]:
dist_array = [dist_to_warn(w, cc_gsr) for w in cc_warn]
dist_array = np.array(dist_array).reshape(len(cc_warn), len(cc_gsr))

dist_df = pd.DataFrame(dist_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

dist_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,53.303495,385.185271,217.134203,218.244641,330.199416,328.483873,9.269567,7.717841,23.669513,236.004415,...,232.400184,349.844394,352.024628,0.000000,230.091764,232.677652,94.752842,100.601359,203.916794,352.269643
BR_1,335.509582,420.299851,179.449093,179.478066,76.724554,270.529550,383.878800,379.213774,363.203426,202.484108,...,162.763258,63.224860,69.220424,381.437798,153.896737,149.946960,365.390428,380.663645,280.809157,60.406410
BR_2,59.776262,373.522264,219.990888,221.199291,331.833848,319.485880,4.581133,19.919905,32.577790,241.429656,...,234.855817,351.329408,353.769821,12.494580,228.646200,231.614863,106.603243,112.898763,194.701446,353.683431
BR_3,172.794365,427.763912,16.593611,14.490137,113.494451,299.382224,230.148796,220.603691,202.957933,37.675263,...,23.448650,133.371488,133.519395,224.996096,93.169889,86.934145,187.386798,201.856329,228.366406,136.256355
BR_4,302.775716,448.535160,138.330004,137.773272,22.789194,300.758768,355.975760,349.006256,332.023040,149.881542,...,121.810194,3.127964,9.432490,352.269643,139.726142,133.509146,323.596448,338.112457,285.712151,0.000000
BR_5,302.775716,448.535160,138.330004,137.773272,22.789194,300.758768,355.975760,349.006256,332.023040,149.881542,...,121.810194,3.127964,9.432490,352.269643,139.726142,133.509146,323.596448,338.112457,285.712151,0.000000
BR_6,81.016645,329.055260,216.072950,217.606935,321.570281,277.697906,49.106740,62.225567,66.507322,246.593757,...,229.112709,340.329763,343.666135,56.133077,209.371313,213.575239,142.494848,151.521931,152.909651,342.409390
BR_7,302.775716,448.535160,138.330004,137.773272,22.789194,300.758768,355.975760,349.006256,332.023040,149.881542,...,121.810194,3.127964,9.432490,352.269643,139.726142,133.509146,323.596448,338.112457,285.712151,0.000000
BR_8,25.911211,413.315200,185.596081,186.381009,300.947863,340.995482,55.898862,39.610392,25.808172,196.861588,...,201.825789,320.891100,322.248194,47.006036,212.819418,213.960007,51.094120,61.025346,219.633794,323.519310
BR_9,309.042016,450.446231,144.722620,144.177122,29.226863,302.278222,362.092780,355.206065,338.250077,156.156619,...,128.182633,9.421008,13.073989,358.433327,144.717729,138.630267,330.038889,344.557645,289.677427,6.445486


In [35]:
out_filename = "test_cc_dist_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
dist_df.to_csv(out_path)

In [17]:
is_approx_list = [eval(e["Approximate_Location"]) for e in cc_gsr]
is_approx_array = np.array(is_approx_list*len(cc_warn)).reshape(len(cc_warn), len(cc_gsr))
is_approx_df = pd.DataFrame(is_approx_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

is_approx_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_1,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_2,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_3,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_4,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_5,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_6,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_7,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_8,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
BR_9,True,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False


In [36]:
out_filename = "test_approx_location_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
is_approx_df.to_csv(out_path)

In [19]:
max_dist_array = 100*np.ones(shape=(len(cc_warn), len(cc_gsr)))
ls_dist_array = np.minimum(dist_array, max_dist_array)
ls_array = ls_vfunc(ls_dist_array, is_approx_array)
ls_df = pd.DataFrame(ls_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

ls_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,0.560380,0.0,0.000000,0.000000,0.000000,0.000000,0.907304,0.922822,0.763305,0.000000,...,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.052472,0.000000,0.000000,0.000000
BR_1,0.000000,0.0,0.000000,0.000000,0.232754,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.367751,0.307796,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.395936
BR_2,0.482704,0.0,0.000000,0.000000,0.000000,0.000000,0.954189,0.800801,0.674222,0.000000,...,0.000000,0.000000,0.000000,0.875054,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_3,0.000000,0.0,0.834064,0.855099,0.000000,0.000000,0.000000,0.000000,0.000000,0.747927,...,0.765514,0.000000,0.000000,0.000000,0.068301,0.130659,0.000000,0.000000,0.000000,0.000000
BR_4,0.000000,0.0,0.000000,0.000000,0.772108,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.968720,0.905675,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
BR_5,0.000000,0.0,0.000000,0.000000,0.772108,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.968720,0.905675,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
BR_6,0.227809,0.0,0.000000,0.000000,0.000000,0.000000,0.508933,0.377744,0.334927,0.000000,...,0.000000,0.000000,0.000000,0.438669,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_7,0.000000,0.0,0.000000,0.000000,0.772108,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.968720,0.905675,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
BR_8,0.889101,0.0,0.000000,0.000000,0.000000,0.000000,0.441011,0.603896,0.741918,0.000000,...,0.000000,0.000000,0.000000,0.529940,0.000000,0.000000,0.489059,0.389747,0.000000,0.000000
BR_9,0.000000,0.0,0.000000,0.000000,0.707731,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.905790,0.869260,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.935545


In [37]:
out_filename = "test_ls_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
ls_df.to_csv(out_path)

In [21]:
date_diff_array = [date_diff_to_warn(w, cc_gsr) for w in cc_warn]
date_diff_array = np.array(date_diff_array).reshape(len(cc_warn), len(cc_gsr))

date_diff_df = pd.DataFrame(date_diff_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

date_diff_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,11,11,11,11,11,10,11,11,11,11,...,-19,-15,-15,-10,-11,-11,-16,-19,-12,-18
BR_1,11,11,11,11,11,10,11,11,11,11,...,-19,-15,-15,-10,-11,-11,-16,-19,-12,-18
BR_2,19,19,19,19,19,18,19,19,19,19,...,-11,-7,-7,-2,-3,-3,-8,-11,-4,-10
BR_3,1,1,1,1,1,0,1,1,1,1,...,-29,-25,-25,-20,-21,-21,-26,-29,-22,-28
BR_4,22,22,22,22,22,21,22,22,22,22,...,-8,-4,-4,1,0,0,-5,-8,-1,-7
BR_5,29,29,29,29,29,28,29,29,29,29,...,-1,3,3,8,7,7,2,-1,6,0
BR_6,6,6,6,6,6,5,6,6,6,6,...,-24,-20,-20,-15,-16,-16,-21,-24,-17,-23
BR_7,11,11,11,11,11,10,11,11,11,11,...,-19,-15,-15,-10,-11,-11,-16,-19,-12,-18
BR_8,9,9,9,9,9,8,9,9,9,9,...,-21,-17,-17,-12,-13,-13,-18,-21,-14,-20
BR_9,27,27,27,27,27,26,27,27,27,27,...,-3,1,1,6,5,5,0,-3,4,-2


In [38]:
out_filename = "test_cc_date_diff_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
date_diff_df.to_csv(out_path)

In [23]:
date_diff_array = np.abs(date_diff_array)
max_dd_array = 4*np.ones(shape=(len(cc_warn), len(cc_gsr)))
min_dd_array = np.zeros(shape=(len(cc_warn), len(cc_gsr)))
ds_dd_array = np.minimum(date_diff_array, max_dd_array)

ds_dd_array = np.maximum(ds_dd_array, min_dd_array)

ds_array = ds_vfunc(ds_dd_array)
ds_df = pd.DataFrame(ds_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

ds_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_2,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.50,0.25,0.25,0.00,0.00,0.00,0.00
BR_3,0.75,0.75,0.75,0.75,0.75,1.00,0.75,0.75,0.75,0.75,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_4,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.75,1.00,1.00,0.00,0.00,0.75,0.00
BR_5,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.75,0.25,0.25,0.00,0.00,0.00,0.50,0.75,0.00,1.00
BR_6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_7,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_8,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_9,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.25,0.75,0.75,0.00,0.00,0.00,1.00,0.25,0.00,0.50


In [39]:
out_filename = "test_ds_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
ds_df.to_csv(out_path)

In [25]:
es_array = [es_match_to_warn(w, cc_gsr) for w in cc_warn]
es_array = np.array(es_array).reshape(len(cc_warn), len(cc_gsr))

es_df = pd.DataFrame(es_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

es_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_1,1,0,1,1,1,1,1,1,1,0,...,1,1,1,1,1,1,1,0,1,1
BR_2,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_3,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_4,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_5,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_6,1,0,1,1,1,1,1,1,1,0,...,1,1,1,1,1,1,1,0,1,1
BR_7,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_8,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
BR_9,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [40]:
out_filename = "test_es_match_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
es_df.to_csv(out_path)

In [27]:
acs_array = [actor_match_to_warn(w, cc_gsr) for w in cc_warn]
acs_array = np.array(acs_array).reshape(len(cc_warn), len(cc_gsr))
acs_df = pd.DataFrame(acs_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

acs_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,1,0,1,1,0,0,0,0,0,1,...,1,1,1,0,0,0,0,0,0,1
BR_1,0,0,0,0,0,0,1,1,1,0,...,0,0,1,0,1,1,1,1,1,0
BR_2,0,1,1,1,1,1,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
BR_3,0,1,1,1,1,1,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
BR_4,0,1,1,1,1,1,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
BR_5,1,0,1,1,0,0,0,0,0,1,...,1,1,1,0,0,0,0,0,0,1
BR_6,0,1,1,1,1,1,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
BR_7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BR_8,1,0,1,1,0,0,0,0,0,1,...,1,1,1,0,0,0,0,0,0,1
BR_9,0,1,1,1,1,1,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0


In [41]:
out_filename = "test_actor_match_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
acs_df.to_csv(out_path)

In [29]:
qs_mat = ls_array + ds_array + es_array + acs_array
qs_mat

array([[1.56038048, 1.        , 1.        , ..., 1.        , 0.        ,
        1.        ],
       [1.        , 0.        , 1.        , ..., 1.        , 2.        ,
        1.3959359 ],
       [0.48270416, 2.        , 1.        , ..., 1.        , 0.        ,
        0.        ],
       ...,
       [1.        , 1.        , 2.        , ..., 1.        , 1.        ,
        2.71501737],
       [0.        , 2.        , 1.46279469, ..., 1.        , 0.        ,
        0.        ],
       [1.        , 1.        , 2.        , ..., 0.        , 1.        ,
        1.        ]])

In [30]:
qs_mat[ls_array == 0] = 0
qs_mat[ds_array == 0] = 0
qs_df = pd.DataFrame(qs_mat,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

qs_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN429,MN430,MN431,MN432,MN433,MN434,MN435,MN436,MN437,MN438
BR_0,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_1,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_2,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,2.375054,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_3,0.00000,0.0,2.584064,2.605099,0.000000,0.000000,0.0,0.0,0.0,3.497927,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_4,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_5,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,2.218720,2.155675,0.000000,0.0,0.0,0.000000,0.000000,0.000000,3.000000
BR_6,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_7,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_8,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000
BR_9,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.000000,1.655790,1.619260,0.000000,0.0,0.0,0.000000,0.000000,0.000000,1.435545


In [42]:
out_filename = "test_qs_mat.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
qs_df.to_csv(out_path)