In [1]:
%matplotlib inline
import numpy as np
import plotly.express as px
import pandas as pd
import matplotlib.pyplot as plt
import requests
import descartes
import calendar
import seaborn as sns
import matplotlib
# Loading data (https://www.nti.org/analysis/articles/cns-north-korea-missile-test-database/). Encoding problem solved with 'unicode_escape'.
df = pd.read_csv('../data/north_korea_missile_test_database.csv', encoding= 'unicode_escape', index_col= "F1")
# Dropping unused columns.
df = df.drop(columns=['Launch Agency/Authority', 'Other Name', 'Additional Information', 'Source(s)', 'Launch Time (UTC)', 'Date Entered/Updated', 'Apogee'])
# Changing formats (str to int or float)
df['Distance Travelled'] = df['Distance Travelled'].apply(lambda x: x.replace('km', '') if isinstance(x, str) else x)
# And then, convert 'Unkown' values to numerics.
df['Distance Travelled'] = df['Distance Travelled'].replace('Unknown', 'NaN') 
# Finally, I can change it to numeric pandas table.
df['Distance Travelled'] = pd.to_numeric(df['Distance Travelled'], errors='coerce').fillna(0)
df['Landing Location'] = df['Landing Location'].apply(lambda x: x.replace('330km east of Hachinohe and 4000 km out into Pacific Ocean', '330km east of Hachinohe') if isinstance(x, str) else x)
# Changing 'location' data
df['Facility Latitude'] = pd.to_numeric(df['Facility Latitude'], errors='coerce')
df['Facility Longitude'] = pd.to_numeric(df['Facility Longitude'], errors='coerce')
# Date changes
df['Date'] = pd.to_datetime(df['Date'])
# Creating a new column for separating the year values in numeric format.
df['Year'] = df['Date'].dt.year
# Another column for changing number values to string values.
df['Month_Number'] = df['Date'].dt.month
# Another column for separating values to month specifically.
df['Month'] = df['Month_Number'].apply(lambda x: calendar.month_abbr[x])
# Another column for counting test numbers.
df["# of Test"] = 1
# Simplifying the Year/Month format.
df['YYYYMM'] = pd.to_datetime(df['Date'], format='%Y%m', errors='coerce')
df['YYYYMM'] = pd.to_datetime(df['Date']).dt.strftime('%Y%m')

Mapping out the missile landing locations

In [2]:
# Almost similar to the facility map graph, need to count by 'Landing Location'
landing_location_counts = df['Landing Location'].value_counts().reset_index()
# Gives the column values
landing_location_counts.columns = ['Location', 'Count']
# From Google map, searched estimated location data
landing_locations = {
    'Sea of Japan or East Sea': (39.955235648894956, 134.61977359260942),
    'Yellow Sea or West Sea': (37.59154703499678, 124.86670024925259),
    'Pacific Ocean': (32.03899252565651, 175.45833648092025), 
    '330km east of Hachinohe': (41.27280362125239, 148.20008702447393),
    'Unknown' : (0,0)
}
# Based on 'landing location' data, creating a new dataframe
df_locations = pd.DataFrame(landing_locations.items(), columns=['Location', 'Coordinates'])
df_locations = pd.merge(df_locations, landing_location_counts, on='Location', how='left')
# Adding 'Latitude', 'Longitude' columns in df_locations
df_locations[['Latitude', 'Longitude']] = pd.DataFrame(df_locations['Coordinates'].tolist(), index=df_locations.index)
# Map out the df_locations
map_fig2 = px.scatter_mapbox(df_locations, lat='Latitude', lon='Longitude', hover_name='Location',
                             color='Location', size='Count', size_max=30,  
                             zoom=2.3, center={'lat': 36.99233273149936, 'lon': -175.65753675093146},
                             width=1200, height=700, title='DPRK Missile Landing Locations')
# Additional layout changes - Not neccessary
map_fig2.update_layout(title=dict(x=0.5, xanchor='center'))
map_fig2.update_layout(mapbox_style="carto-darkmatter", title_font_size=25, title_font_family="Courier New")
map_fig2.update_layout(margin=dict(t=45, l=5, r=0, b=5))

map_fig2.show()