# North Korea provocations database

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_grid as altgrid
import numpy as np
import us
import urllib.request, json
import glob
import os
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import os
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [3]:
alt.themes.register("grid", altgrid.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Download latest version of the database from [here](https://beyondparallel.csis.org/database-north-korean-provocations/)
##### (Until we can figure how to get Selenium to download to this directory instead of the local 'Downloads' folder).  

In [5]:
# url = "https://beyondparallel.csis.org/database-north-korean-provocations/"

In [6]:
# path = "/Users/stiles/github/chromedriver"
# s = Service(path)
# driver = webdriver.Chrome(service=s)

In [7]:
# driver.get(url)
# html = driver.page_source
# soup = BeautifulSoup(html, "html.parser")

In [8]:
# driver.find_element(By.XPATH, "//*[@id='table_1_wrapper']/div[1]/a[3]").click()

In [9]:
# driver.close()

### Import data

In [10]:
src = pd.read_csv("data/raw/provocations_new.csv", parse_dates=["Date"])

In [11]:
src.dtypes

Date           datetime64[ns]
Type                   object
Event                  object
Description            object
Resources              object
dtype: object

In [12]:
src.columns = (
    src.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=True)
    .str.replace("(", "", regex=True)
    .str.replace(")", "", regex=True)
    .str.replace("-", "_", regex=True)
)

### Parse dates

In [13]:
src["year"] = src["date"].dt.year
src["month"] = src["date"].dt.month
src["day"] = src["date"].dt.day
src["day"] = src["date"].dt.day_name()

In [14]:
df = src.sort_values("date", ascending=False).copy()

In [15]:
df["event"] = df["event"].str.lower().str.replace("-range", "")

In [16]:
df["type"] = df["type"].str.replace(" Provocation", "")

In [17]:
df["ballistic"] = df["description"].str.lower().str.contains("ballistic")

In [24]:
df.head()

Unnamed: 0,date,type,event,description,resources,year,month,day,ballistic
329,2022-10-06,Missile,short ballistic missile launch,"North Korea fired two short-range ballistic missiles from the Samsok area in Pyongyang between 6:01am and 6:23am. The missiles flew toward the sea between Korea and Japan, and with one flying about 217 miles (350 km) at an apogee of 50 miles (80 km), and another traveling about 487 miles (800 km) at an altitude of 37 miles (60 km).",https://en.yna.co.kr/view/AEN20221006000853325,2022,10,Thursday,True
328,2022-10-04,Missile,intermediate ballistic missile launch,"North Korea fired an intermediate-range ballistic missile (IRBM) from Mupyong-ri in the northern province of Jagang at 7:23am. The missile flew over Japan, and flew about 2796 miles (4500km) at an apogee of around 607 miles (970km) and top speed of Mach 17.",https://en.yna.co.kr/view/AEN20221004001353325?section=national/defense,2022,10,Tuesday,True
327,2022-10-01,Missile,short ballistic missile launch,"North Korea fired two short-range ballistic missiles toward the sea between Korea and Japan. The launch was detected from the Sunan area in Pyongyang between 6:45am and 7:03am, and the missiles flew about 217 miles (350 km) at apogees of about 18 miles (30 km) and top speeds of Mach 6.",https://en.yna.co.kr/view/AEN20221001000455325?section=nk/nk,2022,10,Saturday,True
326,2022-09-29,Missile,short ballistic missile launch,North Korea fired a short-range ballistic missile towards the sea between Korea and Japan. The launch comes hours after U.S. Vice President Kamala Harris finished her visit to South Korea.,https://en.yna.co.kr/view/AEN20220929010151325?section=national/defense,2022,9,Thursday,True
325,2022-09-28,Missile,short ballistic missile launch,"North Korea fired two short-range ballistic missiles into the sea between Korea and Japan. The missiles were launched from the Sunan area in Pyongyang between 6:10pm and 6:20pm, and flew about 223 miles (360km) at apogees of around 30km and top speeds of Mach 6.",https://en.yna.co.kr/view/AEN20220928009054315,2022,9,Wednesday,True


In [18]:
df.loc[
    (df["ballistic"] == True) & (df["event"].str.contains("short missile launch")),
    "event",
] = "short ballistic missile launch"

In [19]:
types_years = (
    df.groupby(["type", "year"])
    .agg({"date": "count"})
    .reset_index()
    .rename(columns={"date": "provocation_count"})
)

In [20]:
types_years[types_years["type"] == "Missile"].sort_values(
    "year", ascending=False
).head()

Unnamed: 0,type,year,provocation_count
26,Missile,2022,24
25,Missile,2021,8
24,Missile,2020,5
23,Missile,2019,13
22,Missile,2017,16


In [21]:
import altair as alt
from vega_datasets import data

source = data.unemployment_across_industries.url

alt.Chart(types_years).mark_bar(interpolate="cardinal").encode(
    alt.X(
        "year:O",
        axis=alt.Axis(
            domain=False,
            values=[1960, 1980, 2000, 2022],
            tickSize=0,
            title="",
        ),
    ),
    alt.Y("provocation_count:Q", axis=alt.Axis(tickCount=2), title=""),
    alt.Color("type:N", title=" ", scale=alt.Scale(), legend=None),
    alt.Facet("type:N", columns=3, title=" "),
).properties(
    width=300, height=200, title="North Korea's international provocations, by category"
).configure_legend(
    orient="top"
)

In [22]:
import altair as alt
from vega_datasets import data

source = data.unemployment_across_industries.url

alt.Chart(types_years).mark_area(interpolate="cardinal").encode(
    alt.X(
        "year:O",
        axis=alt.Axis(
            domain=False,
            values=[1960, 1970, 1980, 1990, 2000, 2010, 2022],
            tickSize=0,
            title="",
        ),
    ),
    alt.Y("provocation_count:Q", stack="center", axis=None),
    alt.Color("type:N", title=" ", scale=alt.Scale(), legend=None),
    alt.Facet("type:N", columns=1, title=" "),
).properties(
    width=650, height=100, title="North Korea's international provocations, by category"
).configure_legend(
    orient="top"
)

In [23]:
import altair as alt
from vega_datasets import data

source = data.unemployment_across_industries.url

alt.Chart(types_years).mark_area(interpolate="cardinal").encode(
    alt.X(
        "year:O",
        axis=alt.Axis(
            domain=False,
            values=[1960, 1970, 1980, 1990, 2000, 2010, 2022],
            tickSize=0,
            title="",
        ),
    ),
    alt.Y("provocation_count:Q", stack="center", axis=None),
    alt.Color("type:N", title=" ", scale=alt.Scale(), legend=None),
    # alt.Facet("type:N", columns=1, title=" "),
).properties(
    width=650, height=300, title="North Korea's international provocations, by category"
).configure_legend(
    orient="top"
)