In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pyecharts.options as opts
from pyecharts.charts import *

from pprint import pprint
from datetime import datetime
from functools import wraps

import os
os.listdir()

['GTD_analysis.ipynb', 'GTD_data.xlsx', '.ipynb_checkpoints']

In [2]:
GTD_data = pd.read_excel('GTD_data.xlsx')

In [3]:
GTD_data_index = GTD_data.index
GTD_data_columns = list(GTD_data.columns)

code_txt_cnt = {
    'country'       : 1,
    'region'        : 1,
    'alternative'   : 1,
    'attacktype'    : 3,
    'targtype'      : 3,
    'targsubtype'   : 3,
    'natlty'        : 3,
    'claimmode'     : 1,
    'claimmode2'    : 1,
    'claimmode3'    : 1,
    'weaptype'      : 4,
    'weapsubtype'   : 4,
    'propextent'    : 1,
    'hostkidoutcome': 1
}
code_txt_vars = list(code_txt_cnt.keys())

In [20]:
# Code to Text mapping

__SS__ = set()

def __SET_CHECK__(s):
    n = len(s)
    vis_code, vis_str = set(), set()
    for code, string in s:
        assert (code not in vis_code) and (string not in vis_str),     \
            "Duplicated/invalid <code, text> pair: ({}, {}), ({}, {})" \
            .format(code, string, type(code), type(string))
        vis_code.add(code)
        vis_str.add(string)
    return s

def SS_VALIDITY(f):
    @wraps(f)
    def decorated(x, y = 1):
        global __SS__
        f(x, y)

        # Validity check after function invocation.
        return __SET_CHECK__(__SS__)
    return decorated

def __INNER_LOOP__(x):
    global __SS__
    __SS__.clear()
    y = x + "_txt"
    for ns in zip(list(GTD_data[x]), list(GTD_data[y])):
        if pd.isna(ns[0]) and pd.isna(ns[1]):
            continue
        assert pd.isna(ns[0]) == False and pd.isna(ns[1]) == False, \
            "Missing <code, text> ({}, {}) data: ({}, {})"          \
            .format(type(ns[0]), type(ns[1]), *ns)
        __SS__.add((int(ns[0]), ns[1]))
        # __SS__.add(ns)
    return __SS__

@SS_VALIDITY
def __LOOP__(t, cnt = 1):
    global __SS__
    if cnt == 1:
        return __INNER_LOOP__(t)
    
    sets = []
    for i in range(1, cnt + 1):
        x = t + str(i)
        sets.append(__INNER_LOOP__(x).copy())
    __SS__ = set.union(*sets)
        
# BEGIN_II_TERRINFO
## Only take effect when "doubtterr" == 1
## When "doubtterr" == -9, it means doubtterr variable is
## not considered when getting the data, which is invalid.
mp_alternative = dict(__LOOP__('alternative'))
# END_II_TERRINFO

# BEGIN_III_PLACE
mp_region = dict(__LOOP__('region'))
mp_country = dict(__LOOP__('country'))
# END_III_PLACE

# BEGIN_IV_ATKINFO
mp_attacktype = dict(__LOOP__('attacktype', 3))
# END_IV_ATKINFO

# BEGIN_V_WEAPINFO
mp_weaptype = dict(__LOOP__('weaptype', 4))  ## Nuclear doesn't appear in data.
mp_weapsubtype = dict(__LOOP__('weapsubtype', 4))
# END_V_WEAPINFO

# BEGIN_VI_TARGINFO
mp_targtype = dict(__LOOP__('targtype', 3))
mp_targsubtype = dict(__LOOP__('targsubtype', 3))

tmp = dict(__LOOP__('natlty', 3))
tmp_keys = set(tmp.keys()) & set(mp_country.keys())
diff = [(k, tmp[k], mp_country[k]) for k in tmp_keys if tmp[k] != mp_country[k]]
assert len(diff) == 0, \
    "Natlty data isn't consistent with country data: {}".format(diff)
mp_natlty = { **tmp, **mp_country }
mp_country = mp_natlty
del tmp, tmp_keys, diff
# END_VI_TARGINFO

# BEGIN_VII_GINFO
mp_claimmode = dict(__SET_CHECK__(
    set.union(
        __LOOP__('claimmode').copy(),
        __LOOP__('claimmode2').copy(),
        __LOOP__('claimmode3').copy()
    )
))
# END_VII_GINFO

# BEGIN_VIII_SUM
mp_propextent = dict(__LOOP__('propextent'))
mp_hostkidoutcome = dict(__LOOP__('hostkidoutcome'))
# END_VIII_SUM

# BEGIN_IX_EX
# END_IX_EX

to_text_dict = {
    'country'       : mp_country,
    'region'        : mp_region,
    'alternative'   : mp_alternative,
    'attacktype'    : mp_attacktype,
    'targtype'      : mp_targtype,
    'targsubtype'   : mp_targsubtype,
    'natlty'        : mp_natlty,
    'claimmode'     : mp_claimmode,
    'weaptype'      : mp_weaptype,
    'weapsubtype'   : mp_weapsubtype,
    'propextent'    : mp_propextent,
    'hostkidoutcome': mp_hostkidoutcome
}

# Info of specified code
info_specificity = {
    1: "精确位置：事件发生在城市/乡村/镇",
    2: "最小的地方行政区的质心：事件发生在城市/乡村/镇，但没有可以发现的经纬度",
    3: "最小的地方行政区的质心：事件没有在城市/乡村/镇发生",
    4: "第一级行政区域的中心：没有可被识别的第二级或更小的区域，所以坐标是",
    5: "经纬度未知：没有可被识别的第一级行政区可以与恐怖袭击的地点相对应"
}

In [22]:
# Function definition

def parse_eventid(s):
    assert len(s) == 12, "Eventid corrupted, length != 12"
    day, eventid = s[:8], s[8:]
    return (day, eventid)

def line_parse_crit(line_dict):
    f1, f2, f3 = [int(x) for x in [line_dict['crit1'], line_dict['crit2'], line_dict['crit3']]]
    return f1 + f2 * 2 + f3 * 4

## Judging approx by eventid is not accurate.
def line_parse_approx(line_approx):
    f_month, f_day = [x == '0' or x == 0 for x in [line_approx['imonth'], line_approx['iday']]]
    return f_month * 2 + f_day

def strip_summary_date(s):
    cur = 0
    while s[cur] != ':':
        cur += 1
    cur += 1
    if s[cur] == ' ':
        cur += 1
    return s[cur:]

def to_text(x, s):
    if pd.isna(x):
        return np.nan
    if str.isdigit(s[-1]):
        s = s[:-1]
    return to_text_dict[s][x]

In [24]:
GTD_ind = GTD_data_index
GTD_col = [x for x in GTD_data_columns if "_txt" not in x]   ## 135-28

GTD = GTD_data[GTD_col]
GTD

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,region,provstate,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,199801010001,1998,1,1,,0,NaT,34,11,Bujumbura Mairie,...,,"“Burundi Rebels, Ex-Rwandan Army Soldiers Blam...",“Burundi--Attack Reported on Bujumbura Airport...,,CETIS,0,1,0,1,
1,199801010002,1998,1,1,,0,NaT,167,9,Moscow (Federal City),...,,"“Bomb injures 3 in Moscow subway system,” The ...","“Bomb injures 3 in Moscow subway,” Charleston ...","“Bomb Injures 3 Workers in Moscow Metro,” Los ...",CETIS,-9,-9,0,-9,
2,199801010003,1998,1,1,,0,NaT,603,8,Northern Ireland,...,,“Protestant gunmen kill Catholic in New Year's...,“Ulster Peace Shattered by Shooting: Catholic ...,,CETIS,0,0,1,1,
3,199801020001,1998,1,2,,0,NaT,95,10,Baghdad,...,,“Iraq Condemns Attack on UNSCOM Baghdad Office...,"Farouk Choukri , “Iraq, UN Officials Continue ...","“Iraqi Interior Minister on UNSCOM Attack, Kuw...",CETIS,-9,-9,1,1,
4,199801020002,1998,1,2,,0,NaT,155,10,West Bank,...,,"“Woman Shot,” The Philadelphia Inquirer, Janua...",“Israeli Woman Critically Hurt by Gunfire in W...,,CETIS,-9,-9,0,-9,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114178,201712310022,2017,12,31,,0,NaT,182,11,Middle Shebelle,...,,"""Somalia: Al-Shabaab Militants Attack Army Che...","""Highlights: Somalia Daily Media Highlights 2 ...","""Highlights: Somalia Daily Media Highlights 1 ...",START Primary Collection,0,0,0,0,
114179,201712310029,2017,12,31,,0,NaT,200,10,Lattakia,...,,"""Putin's 'victory' in Syria has turned into a ...","""Two Russian soldiers killed at Hmeymim base i...","""Two Russian servicemen killed in Syria mortar...",START Primary Collection,-9,-9,1,1,
114180,201712310030,2017,12,31,,0,NaT,160,5,Maguindanao,...,,"""Maguindanao clashes trap tribe members,"" Phil...",,,START Primary Collection,0,0,0,0,
114181,201712310031,2017,12,31,,0,NaT,92,6,Manipur,...,,"""Trader escapes grenade attack in Imphal,"" Bus...",,,START Primary Collection,-9,-9,0,-9,
