In [1]:
pwd

'/Users/peterhaglich/Dropbox/Work/IARPA/Mercury/peterhaglich/mercury-challenge/src/ExpressScore/notebooks/test'

In [2]:
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from dateutil.parser import parse
import datetime
import calendar
import json
import os
import re

import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use("fivethirtyeight")

from collections import Counter

from geopy.distance import distance

In [4]:
EVENT_TYPE = "Military Action"
EVT_ABBR = "MA"
COUNTRY = "Syria"

month_str = "May 2018"
month_path_str = "_".join(month_str.split(" "))

FIRST_DATE = parse("2018-05-01")
LAST_DATE = parse("2018-05-31")

MERC_CHALLENGE_HOME = os.path.abspath("../../../..")

MERC_HOME = os.path.join(MERC_CHALLENGE_HOME, "..", "mercury")
EXPRESS_SCORE_PATH = os.path.join(MERC_CHALLENGE_HOME, "src", "ExpressScore")
print(os.listdir(EXPRESS_SCORE_PATH))
ES_TEST_RESOURCE_PATH = os.path.join(EXPRESS_SCORE_PATH, "resources", "test", "sy_ma_may_2018")
DATA_HOME = os.path.join(MERC_HOME, "data")
WARN_PATH = os.path.join(DATA_HOME, "baserate_warnings", "MANSA")
month_warn_path = os.path.join(WARN_PATH, month_path_str)
MANSA_GSR_PATH = os.path.join(MERC_CHALLENGE_HOME, "data", "gsr", "ma_gsr")

MAX_DIST = 100.0
DIST_BUFFER = 16.67
MAX_DATE_DIFF = 4.0

['.DS_Store', '__init__.py', '__pycache__', 'main', 'notebooks', 'README.md', 'resources', 'test']


In [5]:
br_warn_filename = "Baserate_MANSA_{}.json".format(month_path_str)
br_warn_path = os.path.join(WARN_PATH, br_warn_filename)
with open(br_warn_path, "r", encoding="utf8") as f:
    br_warn = json.load(f)
br_warn = [w for w in br_warn["payload"] if w["Event_Type"] == "Military Action"]
br_country_counts = Counter([w["Country"] for w in br_warn])
print(br_country_counts)

Counter({'Syria': 1685, 'Iraq': 529, 'Lebanon': 42, 'Egypt': 35, 'Saudi Arabia': 5, 'Yemen': 5})


In [6]:
gsr_filename = "MA_{}.json".format(month_path_str)
gsr_path = os.path.join(MANSA_GSR_PATH, gsr_filename)
with open(gsr_path, "r", encoding="utf8") as f:
    gsr = json.load(f)
ma_gsr = [e for e in gsr if e["Event_Type"] == "Military Action"]
gsr_country_counts = Counter([e["Country"] for e in ma_gsr])
print(gsr_country_counts)

Counter({'Syria': 884, 'Iraq': 439, 'Saudi Arabia': 13, 'Egypt': 10, 'Yemen': 6, 'Lebanon': 4})


In [7]:
test_evt = [e for e in ma_gsr if e["Country"] == COUNTRY][0]
test_evt

{'Actor': 'Russian Military;Syrian Arab Military',
 'Approximate_Location': 'False',
 'City': 'Al Ḩajar al Aswad',
 'Country': 'Syria',
 'Earliest_Reported_Date': '2018-05-01',
 'Event_Date': '2018-05-01',
 'Event_ID': 'MN265279',
 'Event_Subtype': 'Conflict',
 'Event_Type': 'Military Action',
 'First_Reported_Link': 'https://www.almasdarnews.com/article/russian-airstrikes-hammer-isis-in-southern-damascus-as-syrian-palestinian-troops-advance/',
 'GSS_Link': 'https://www.almasdarnews.com/article/russian-airstrikes-hammer-isis-in-southern-damascus-as-syrian-palestinian-troops-advance/',
 'Latitude': 33.4576,
 'Longitude': 36.3042,
 'News_Source': 'Al Masdar',
 'Other_Links': 'http://www.syriahr.com/2018/05/01/%D8%A7%D9%84%D8%B7%D8%A7%D8%A6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D8%B1%D8%A8%D9%8A%D8%A9-%D8%AA%D8%B9%D8%A7%D9%88%D8%AF-%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D8%A7%D9%84%D8%AC%D9%86%D9%88/;',
 'Revision_Date': '2018-06-11',
 'State': 'Rif-dimashq'}

In [8]:
cc_gsr = [e for e in ma_gsr if e["Country"] == COUNTRY]
print(len(cc_gsr))
for i, e in enumerate(cc_gsr):
    new_id = "MN{}".format(i)
    e["Event_ID"] = new_id
out_filename = "test_cc_gsr.json"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
with open(out_path, "w") as f:
    json.dump(cc_gsr, f, ensure_ascii=False, indent=2)

884


In [9]:
cc_warn = [w for w in br_warn if w["Country"] == COUNTRY]
cc_warn = [w for w in cc_warn if parse(w["Event_Date"]) <= LAST_DATE
           and parse(w["Event_Date"]) >= FIRST_DATE]
for w in cc_warn:
    if w["Event_Subtype"] == "Armed Conflict":
        w["Event_Subtype"] = "Conflict"
    keys_ = list(w.keys())
    for k in keys_:
        if re.findall("Target", k):
            del w[k]
print(len(cc_warn))
for i, w in enumerate(cc_warn):
    new_id = "BR_{}".format(i)
    w["Warning_ID"] = new_id
out_filename = "test_cc_warnings.json"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
with open(out_path, "w") as f:
    json.dump(cc_warn, f, ensure_ascii=False, indent=2)

1685


In [10]:
cc_gsr[0]

{'Actor': 'Russian Military;Syrian Arab Military',
 'Approximate_Location': 'False',
 'City': 'Al Ḩajar al Aswad',
 'Country': 'Syria',
 'Earliest_Reported_Date': '2018-05-01',
 'Event_Date': '2018-05-01',
 'Event_ID': 'MN0',
 'Event_Subtype': 'Conflict',
 'Event_Type': 'Military Action',
 'First_Reported_Link': 'https://www.almasdarnews.com/article/russian-airstrikes-hammer-isis-in-southern-damascus-as-syrian-palestinian-troops-advance/',
 'GSS_Link': 'https://www.almasdarnews.com/article/russian-airstrikes-hammer-isis-in-southern-damascus-as-syrian-palestinian-troops-advance/',
 'Latitude': 33.4576,
 'Longitude': 36.3042,
 'News_Source': 'Al Masdar',
 'Other_Links': 'http://www.syriahr.com/2018/05/01/%D8%A7%D9%84%D8%B7%D8%A7%D8%A6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D8%B1%D8%A8%D9%8A%D8%A9-%D8%AA%D8%B9%D8%A7%D9%88%D8%AF-%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D8%A7%D9%84%D8%AC%D9%86%D9%88/;',
 'Revision_Date': '2018-06-11',
 'State': 'Rif-dimashq'}

In [11]:
cc_warn[0]

{'City': 'Ḩammūrah',
 'Country': 'Syria',
 'Actor': 'Turkish Military',
 'timestamp': '2018-05-17T19:20:17.0',
 'Longitude': 36.3858,
 'Probability': 0.9717678358,
 'Event_Subtype': 'Conflict',
 'Event_Type': 'Military Action',
 'State': 'Rif-dimashq',
 'Latitude': 33.5278,
 'Event_Date': '2018-05-21'}

In [12]:
def dist(warn, evt):
    w_lat = warn["Latitude"]
    w_long = warn["Longitude"]
    e_lat = evt["Latitude"]
    e_long = evt["Longitude"]
    return distance((w_lat, w_long), (e_lat, e_long)).km
def dist_to_warn(w, gsr_list):
    return [dist(w, e) for e in gsr_list]
def date_diff(warn, evt):
    w_date = parse(warn["Event_Date"])
    e_date = parse(evt["Event_Date"])
    delta = (w_date - e_date).days
    return delta
def date_diff_to_warn(w, gsr_list):
    return [date_diff(w, e) for e in gsr_list]
def es_match(warn, evt):
    w_es = warn["Event_Subtype"]
    e_es = evt["Event_Subtype"]
    return (w_es == e_es)
def es_match_to_warn(w, gsr_list):
    return [int(es_match(w, e)) for e in gsr_list]
def actor_match(warn, evt):
    e_actors = evt["Actor"].split(";")
    return (warn["Actor"] in e_actors)
def actor_match_to_warn(w, gsr_list):
    return [int(actor_match(w, e)) for e in gsr_list]
def ls(dist, approx_flag=False):
    max_dist = MAX_DIST - approx_flag*DIST_BUFFER
    dist = max(0, dist-approx_flag*DIST_BUFFER)
    return 1 - dist/max_dist
ls_vfunc = np.vectorize(ls)
def ds(date_diff):
    return 1 - abs(date_diff)/MAX_DATE_DIFF
ds_vfunc = np.vectorize(ds)

In [13]:
print(date_diff(cc_warn[0], cc_gsr[0]))
print(date_diff_to_warn(cc_warn[0], cc_gsr))

print(dist(cc_warn[0], cc_gsr[0]))

print(dist_to_warn(cc_warn[0], cc_gsr))
print(es_match(cc_warn[0], cc_gsr[0]))

print(es_match_to_warn(cc_warn[0], cc_gsr))
print(actor_match(cc_warn[0], cc_gsr[0]))
print(actor_match_to_warn(cc_warn[0], cc_gsr))

20
[20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 18, 19, 19, 19, 19, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 18, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 20, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 18, 17, 18, 17, 18, 17, 17, 17, 17, 17, 17, 20, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17, 17, 17, 16, 16, 16, 15, 16, 16, 15, 15, 15, 15, 16, 15, 15, 14, 14, 14, 14, 13, 14, 14, 13, 13, 14, 13, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 14, 13, 13, 13, 13, 11, 20, 19, 17, 17, 17, 17, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 12, 12, 12, 13, 18, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 17, 17, 17, 16, 10, 10, 10, 11, 11, 16, 16, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 10, 10, 10, 17, 17, 11, 16, 11, 16, 16, 16, 19, 19, 16, 16, 16, 15, 16, 16, 16, 15, 15, 15, 15, 15, 15, 

In [14]:
print(cc_warn[0]["Event_Subtype"], cc_gsr[0]["Event_Subtype"], cc_gsr[1]["Event_Subtype"])

Conflict Conflict Conflict


In [15]:
dist_array = [dist_to_warn(w, cc_gsr) for w in cc_warn]
dist_array = np.array(dist_array).reshape(len(cc_warn), len(cc_gsr))

dist_df = pd.DataFrame(dist_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

dist_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,10.868631,10.107815,10.868631,205.794910,10.868631,19.133773,10.868631,10.868631,136.654341,24.182615,...,192.797673,200.165202,205.794910,10.868631,398.509768,10.868631,409.302757,206.925731,9.303666,248.483147
BR_1,254.724412,253.454343,254.724412,48.531484,254.724412,244.324429,254.724412,254.724412,108.752677,263.067587,...,61.557026,51.717085,48.531484,254.724412,284.894987,254.724412,324.013276,43.202833,253.667918,121.752989
BR_2,160.386751,159.055475,160.386751,54.091852,160.386751,154.287947,160.386751,160.386751,27.618995,167.373469,...,65.717781,48.747479,54.091852,160.386751,332.886175,160.386751,361.539061,56.255603,159.519160,157.639401
BR_3,309.133929,308.015724,309.133929,128.339389,309.133929,293.164693,309.133929,309.133929,168.578675,319.643839,...,108.012410,129.812242,128.339389,309.133929,213.686284,309.133929,257.965893,121.461581,307.809103,87.669541
BR_4,7.023920,6.597643,7.023920,209.933779,7.023920,20.716893,7.023920,7.023920,140.922249,20.780664,...,197.124941,204.318629,209.933779,7.023920,402.057773,7.023920,412.515636,211.109017,5.347033,252.592672
BR_5,225.678599,224.376639,225.678599,16.145306,225.678599,217.079338,225.678599,225.678599,81.084124,233.353149,...,50.119245,18.990194,16.145306,225.678599,304.504303,225.678599,340.555303,10.573915,224.708709,133.594062
BR_6,433.777036,433.521180,433.777036,385.592292,433.777036,406.988776,433.777036,433.777036,369.469297,447.608532,...,332.913531,382.891286,385.592292,433.777036,94.882973,433.777036,43.402215,377.892155,432.084042,240.787894
BR_7,7.023920,6.597643,7.023920,209.933779,7.023920,20.716893,7.023920,7.023920,140.922249,20.780664,...,197.124941,204.318629,209.933779,7.023920,402.057773,7.023920,412.515636,211.109017,5.347033,252.592672
BR_8,311.746957,310.837232,311.746957,173.204602,311.746957,291.191252,311.746957,311.746957,188.549147,323.965330,...,132.346906,172.482604,173.204602,311.746957,148.140523,311.746957,189.885820,165.422915,310.218939,53.191005
BR_9,275.999983,274.668013,275.999983,61.796172,275.999983,269.004088,275.999983,275.999983,133.653851,282.788407,...,98.621825,67.588262,61.796172,275.999983,319.665589,275.999983,360.881119,61.730369,275.131613,162.170929


In [16]:
out_filename = "test_cc_dist_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
dist_df.to_csv(out_path)

In [17]:
is_approx_list = [eval(e["Approximate_Location"]) for e in cc_gsr]
is_approx_array = np.array(is_approx_list*len(cc_warn)).reshape(len(cc_warn), len(cc_gsr))
is_approx_df = pd.DataFrame(is_approx_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

is_approx_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_1,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_2,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_3,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_4,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_5,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_6,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_7,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_8,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True
BR_9,False,False,True,False,True,True,False,False,False,False,...,False,False,False,True,False,True,True,False,False,True


In [18]:
out_filename = "test_approx_location_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
is_approx_df.to_csv(out_path)

In [19]:
max_dist_array = 100*np.ones(shape=(len(cc_warn), len(cc_gsr)))
ls_dist_array = np.minimum(dist_array, max_dist_array)
ls_array = ls_vfunc(ls_dist_array, is_approx_array)
ls_df = pd.DataFrame(ls_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

ls_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,0.891314,0.898922,1.000000,0.000000,1.000000,0.970434,0.891314,0.891314,0.000000,0.758174,...,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,0.906963,0.000000
BR_1,0.000000,0.000000,0.000000,0.514685,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.384430,0.482829,0.514685,0.000000,0.000000,0.000000,0.000000,0.567972,0.000000,0.000000
BR_2,0.000000,0.000000,0.000000,0.459081,0.000000,0.000000,0.000000,0.000000,0.723810,0.000000,...,0.342822,0.512525,0.459081,0.000000,0.000000,0.000000,0.000000,0.437444,0.000000,0.000000
BR_3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.147971
BR_4,0.929761,0.934024,1.000000,0.000000,1.000000,0.951435,0.929761,0.929761,0.000000,0.792193,...,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,0.946530,0.000000
BR_5,0.000000,0.000000,0.000000,0.838547,0.000000,0.000000,0.000000,0.000000,0.189159,0.000000,...,0.498808,0.810098,0.838547,0.000000,0.000000,0.000000,0.000000,0.894261,0.000000,0.000000
BR_6,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.051170,0.000000,0.679201,0.000000,0.000000,0.000000
BR_7,0.929761,0.934024,1.000000,0.000000,1.000000,0.951435,0.929761,0.929761,0.000000,0.792193,...,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,0.946530,0.000000
BR_8,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.561730
BR_9,0.000000,0.000000,0.000000,0.382038,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.013782,0.324117,0.382038,0.000000,0.000000,0.000000,0.000000,0.382696,0.000000,0.000000


In [20]:
out_filename = "test_ls_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
ls_df.to_csv(out_path)

In [21]:
date_diff_array = [date_diff_to_warn(w, cc_gsr) for w in cc_warn]
date_diff_array = np.array(date_diff_array).reshape(len(cc_warn), len(cc_gsr))

date_diff_df = pd.DataFrame(date_diff_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

date_diff_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,20,20,20,20,20,20,20,20,20,20,...,-10,-6,-6,12,-8,7,11,13,-3,-9
BR_1,15,15,15,15,15,15,15,15,15,15,...,-15,-11,-11,7,-13,2,6,8,-8,-14
BR_2,21,21,21,21,21,21,21,21,21,21,...,-9,-5,-5,13,-7,8,12,14,-2,-8
BR_3,7,7,7,7,7,7,7,7,7,7,...,-23,-19,-19,-1,-21,-6,-2,0,-16,-22
BR_4,29,29,29,29,29,29,29,29,29,29,...,-1,3,3,21,1,16,20,22,6,0
BR_5,15,15,15,15,15,15,15,15,15,15,...,-15,-11,-11,7,-13,2,6,8,-8,-14
BR_6,1,1,1,1,1,1,1,1,1,1,...,-29,-25,-25,-7,-27,-12,-8,-6,-22,-28
BR_7,27,27,27,27,27,27,27,27,27,27,...,-3,1,1,19,-1,14,18,20,4,-2
BR_8,4,4,4,4,4,4,4,4,4,4,...,-26,-22,-22,-4,-24,-9,-5,-3,-19,-25
BR_9,8,8,8,8,8,8,8,8,8,8,...,-22,-18,-18,0,-20,-5,-1,1,-15,-21


In [22]:
out_filename = "test_cc_date_diff_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
date_diff_df.to_csv(out_path)

In [23]:
date_diff_array = np.abs(date_diff_array)
max_dd_array = 4*np.ones(shape=(len(cc_warn), len(cc_gsr)))
min_dd_array = np.zeros(shape=(len(cc_warn), len(cc_gsr)))
ds_dd_array = np.minimum(date_diff_array, max_dd_array)

ds_dd_array = np.maximum(ds_dd_array, min_dd_array)

ds_array = ds_vfunc(ds_dd_array)
ds_df = pd.DataFrame(ds_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

ds_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.25,0.00
BR_1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.50,0.00,0.00,0.00,0.00
BR_2,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.50,0.00
BR_3,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.75,0.00,0.00,0.50,1.00,0.00,0.00
BR_4,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.75,0.25,0.25,0.00,0.75,0.00,0.00,0.00,0.00,1.00
BR_5,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.50,0.00,0.00,0.00,0.00
BR_6,0.75,0.75,0.75,0.75,0.75,0.75,0.75,0.75,0.75,0.75,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
BR_7,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.25,0.75,0.75,0.00,0.75,0.00,0.00,0.00,0.00,0.50
BR_8,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.25,0.00,0.00
BR_9,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,1.00,0.00,0.00,0.75,0.75,0.00,0.00


In [24]:
out_filename = "test_ds_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
ds_df.to_csv(out_path)

In [25]:
es_array = [es_match_to_warn(w, cc_gsr) for w in cc_warn]
es_array = np.array(es_array).reshape(len(cc_warn), len(cc_gsr))

es_df = pd.DataFrame(es_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

es_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_2,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_3,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_4,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_5,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_6,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_7,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_8,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1
BR_9,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,0,1


In [26]:
out_filename = "test_es_match_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
es_df.to_csv(out_path)

In [27]:
acs_array = [actor_match_to_warn(w, cc_gsr) for w in cc_warn]
acs_array = np.array(acs_array).reshape(len(cc_warn), len(cc_gsr))
acs_df = pd.DataFrame(acs_array,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

acs_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
BR_1,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_2,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_3,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
BR_5,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_6,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_7,0,0,0,0,1,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
BR_8,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1
BR_9,1,0,1,0,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,1,1


In [28]:
out_filename = "test_actor_match_matrix.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
acs_df.to_csv(out_path)

In [29]:
qs_mat = ls_array + ds_array + es_array + acs_array
qs_mat

array([[1.89131369, 1.89892185, 2.        , ..., 1.        , 1.15696334,
        1.        ],
       [2.        , 1.        , 2.        , ..., 0.56797167, 1.        ,
        2.        ],
       [2.        , 1.        , 2.        , ..., 0.43744397, 1.5       ,
        2.        ],
       ...,
       [2.        , 1.        , 2.        , ..., 0.50972048, 1.        ,
        3.        ],
       [1.        , 1.        , 1.        , ..., 0.19163181, 0.        ,
        2.16806217],
       [2.        , 1.        , 2.        , ..., 0.41861776, 1.        ,
        2.        ]])

In [30]:
qs_mat[ls_array == 0] = 0
qs_mat[ds_array == 0] = 0
qs_df = pd.DataFrame(qs_mat,
                       index = [w["Warning_ID"] for w in cc_warn],
                       columns = [e["Event_ID"] for e in cc_gsr])

qs_df

Unnamed: 0,MN0,MN1,MN2,MN3,MN4,MN5,MN6,MN7,MN8,MN9,...,MN874,MN875,MN876,MN877,MN878,MN879,MN880,MN881,MN882,MN883
BR_0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.156963,0.000000
BR_1,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_2,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_3,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_4,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_5,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_6,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_7,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_8,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
BR_9,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.132696,0.000000,0.000000


In [31]:
out_filename = "test_qs_mat.csv"
out_path = os.path.join(ES_TEST_RESOURCE_PATH, out_filename)
qs_df.to_csv(out_path)