# North Korea provocations database

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_grid as altgrid
from altair_saver import save
import numpy as np
import us
import urllib.request, json
import glob
import os
import requests
from bs4 import BeautifulSoup
import os
import time
import matplotlib.pyplot as plt
import squarify
from matplotlib.pyplot import figure

In [3]:
alt.themes.register("grid", altgrid.theme)
alt.themes.enable("grid")
figure(figsize=(8, 6), dpi=80)

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [4]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Download latest version of the database from here

In [5]:
# https://beyondparallel.csis.org/database-north-korean-provocations/

### Import data

In [6]:
src = pd.read_csv("data/raw/Provocations New.csv")

In [7]:
src.columns = (
    src.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=True)
    .str.replace("(", "", regex=True)
    .str.replace(")", "", regex=True)
    .str.replace("-", "_", regex=True)
)

In [12]:
src['description'].iloc[0]

'Around 10 North Korean military aircraft conducted exercises near the inter-Korean border between 10:30pm and 12:20 am. Some flew as close as 15 miles to the Military Demarcation Line, 7 miles close to the Northern Limit Line and 3 miles close to the no-fly zone agreed to in the 2018 September 19 military agreement.'

### Parse dates

In [9]:
src["date"] = pd.to_datetime(src["date"])
src["year"] = src["date"].dt.year
src["month"] = src["date"].dt.month
src["day"] = src["date"].dt.day
src["day"] = src["date"].dt.day_name()

In [10]:
src["type"] = src["type"].str.replace(" Provocation", "")

In [35]:
len(src)

In [37]:
src.tail()

Unnamed: 0,date,type,event,description,resources,year,month,day
331,1963-07-29,Other,Exchange of Fire at DMZ,A group of at least seven North Korean soldier...,https://goo.gl/8PXjzE; The US Military Experie...,1963,7,Monday
332,1962-11-20,Other,Exchange of Fire at DMZ,NK troops attacked a UN observation post 7 mil...,http://www.cfr.org/content/publications/attach...,1962,11,Tuesday
333,1960-07-30,Other,Exchange of Fire and Sinking of NK Vessel,A South Korean destroyer sinks a North Korean ...,https://goo.gl/NEgR7x The US Military Experien...,1960,7,Saturday
334,1959-06-15,Other,NK Fighter Jets Attack U.S. Navy Plane,Two North Korean jet fighters (MiG) attacked a...,https://goo.gl/UFh93r The US Military Experien...,1959,6,Monday
335,1958-02-16,Other,Hijacking of South Korean Plane,North Korean agents hijacked a South Korean pl...,https://goo.gl/Xn9Wr4 The US Military Experien...,1958,2,Sunday


---

## Aggregate

#### Provocation types

In [11]:
prov_types_src = (
    src.groupby(["type", "year"])
    .agg({"date": "count"})
    .reset_index()
    .rename(columns={"date": "count"})
)

In [1]:
prov_types_src.head(27)

NameError: name 'prov_types_src' is not defined

In [12]:
# prov_types["date"] = pd.to_datetime("12/31/" + prov_types["year"].astype(str))

In [13]:
dummy = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vT1tUkO4UkOeOl5_O_j5yEHplUJF1NLC1u55kCWg_1HsbTputy80dZJubTzCdHCKPmTgMS8V9KS6cmM/pub?gid=0&single=true&output=csv"
)

In [14]:
prov_types = pd.merge(prov_types_src, dummy, on=["year", "type"], how="right").fillna(0)

In [15]:
prov_types["year"] = prov_types["year"].astype(str)

In [41]:
prov_types[prov_types["year"] == "2022"]

Unnamed: 0,type,year,count
62,Missile,2022,27.0
125,Nuclear,2022,0.0
188,Other,2022,7.0


---

## Charts

#### Extended

In [64]:
circles = (
    alt.Chart(prov_types)
    .mark_circle(thickness=2, opacity=0.5)
    .encode(
        alt.X(
            "year:O",
            axis=alt.Axis(
                values=["1960", "1970", "1980", "1990", "2000", "2010", "2022"]
            ),
            title=" ",
        ),
        alt.Y("type:N", title=" "),
        alt.Size(
            "count:Q",
            title="Count",
            scale=alt.Scale(range=[0, 2000]),
            legend=None,
        ),
        alt.Color("type:N", legend=None),
    )
    .properties(
        width=820,
        height=200,
        title="",
    )
)

(circles).configure_legend(symbolType="circle")

#### Tablet

In [57]:
circles = (
    alt.Chart(prov_types, padding={"top": -30})
    .mark_circle(thickness=2, opacity=0.5)
    .encode(
        alt.X(
            "year:O",
            axis=alt.Axis(
                values=["1960", "1970", "1980", "1990", "2000", "2010", "2022"]
            ),
            title=" ",
        ),
        alt.Y("type:N", title=" "),
        alt.Size(
            "count:Q",
            title="Count",
            scale=alt.Scale(range=[0, 1000]),
            legend=None,
        ),
        alt.Color("type:N", legend=None),
    )
    .properties(
        width=440,
        height=200,
        title="",
    )
)

(circles)

#### Mobile

In [59]:
circles = (
    alt.Chart(prov_types, padding={"top": -30})
    .mark_circle(thickness=2, opacity=0.5)
    .encode(
        alt.X(
            "year:O",
            axis=alt.Axis(
                values=["1960", "1970", "1980", "1990", "2000", "2010", "2022"]
            ),
            title=" ",
        ),
        alt.Y("type:N", title=" "),
        alt.Size(
            "count:Q",
            title="Count",
            scale=alt.Scale(range=[0, 1000]),
            legend=None,
        ),
        alt.Color("type:N", legend=None),
    )
    .properties(
        width=300,
        height=200,
        title="",
    )
)

(circles)

#### Wide

In [63]:
circles_wide = (
    alt.Chart(prov_types, padding={"top": -30})
    .mark_circle(thickness=2, opacity=0.5)
    .encode(
        alt.X(
            "year:O",
            axis=alt.Axis(
                values=["1960", "1970", "1980", "1990", "2000", "2010", "2022"]
            ),
            title=" ",
        ),
        alt.Y("type:N", title=" "),
        alt.Size(
            "count:Q",
            title="Count",
            scale=alt.Scale(range=[0, 1000]),
            legend=None,
        ),
        alt.Color("type:N", legend=None),
    )
    .properties(
        width=580,
        height=300,
        title="",
    )
)

circles_wide