# Objective
This notebook provides a walkthrough to find the world's most remote capital city using longitude and latitude.  The haversine formula is used to calculate distances betwen points and is visualised using plotly.

## Importing the Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go

## Reading and Cleansing the dataset

In [None]:
# Load data from file
df_world = pd.read_csv('/kaggle/input/world-capitals-gps/concap.csv')

# Only want locations with longitude and latitude details
df1 = df_world[(df_world['CapitalLatitude'] != 0) & (df_world['CapitalLongitude'] != 0)]

# Remove nulls
df1 = df1.dropna()

df_world.info()

Let's remove some territories from this analysis

In [None]:
# Remove Antartica
df1 = df1[df1['ContinentName'] != 'Antarctica']

# Remove other locations
df1 = df1[~df1['CountryName'].isin(
    ['Norfolk Island', 
     'US Minor Outlying Islands'])]

## Haversine Implementation
Distance between locations can be calculated using the [haversine formula](https://en.wikipedia.org/wiki/Haversine_formula)
Let's create a dataframe with all possible pairings of locations.

In [None]:
# Get all possible "pairs" of locations
df2 = df1.copy()
df_cartesian = df1.assign(key=1).merge(df2.assign(key=1), on='key').drop('key', 1)
df_cartesian = df_cartesian[df_cartesian['CapitalName_x'] != df_cartesian['CapitalName_y']]
df_cartesian.head(-5)

For each combination of locations, we will apply the haversine formula to calculate the distance between points in KM

In [None]:
# Function for the haversine formula
def haversine_np(lon1, lat1, lon2, lat2):

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km

In [None]:
# Create a new data frame column with haversine calculation
df_cartesian['distance'] = haversine_np(
    df_cartesian['CapitalLongitude_x'],
    df_cartesian['CapitalLatitude_x'],
    df_cartesian['CapitalLongitude_y'],
    df_cartesian['CapitalLatitude_y'])
df_cartesian.head(-5)

In [None]:
# Get closest city per city
df_plot = df_cartesian.sort_values(['distance','CountryName_y','CapitalName_y']). \
drop_duplicates(['CountryName_x', 'CapitalName_x', 'CapitalLatitude_x', 'CapitalLongitude_x'], keep = 'first')

## Results

In [None]:
# Get top10 most isolated cities
df_topN = df_plot.sort_values(by='distance', ascending=False)
df_topN[['CapitalName_x', 'CountryName_x', 'CapitalName_y', 'CountryName_y', 'distance']].iloc[0:10]

For something a little more interesting, let's plot these coordinates on a map and display the details of it's closest city

In [None]:
# Plot capital cities and display details of closest city 
fig = go.Figure(
    data=go.Scattergeo(
        text = df_plot, 
        lon  = df_plot['CapitalLongitude_x'], 
        lat  = df_plot['CapitalLatitude_x'], 
        mode = 'markers', 
        hovertemplate = "<b>%{text[0]}</b>"
                        "<br><br>"
                        "Capital: %{text[1]}"
                        "<extra><b>Closest Capital</b>"
                        "<br>%{text[7]}: %{text[6]}"
                        "<br>%{text[12]:.0f}km</extra>"))

fig.update_traces(mode='markers', marker={'sizemode':'area', 'sizeref':10})
            
fig.update_layout(title    = 'Capitals',
                  autosize = True,
                  width    = 800,
                  height   = 450,
                  margin   = {"r":0,"t":50,"l":0,"b":0})