In [1]:
import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup
import unicodedata

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

pd.set_option('display.max_column', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', 500)
pd.set_option('expand_frame_repr', True)

## Scrape Data 

In [2]:
# US gov list of travel advisories

URL = "https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories.html/"

res = requests.get(URL).text
soup = BeautifulSoup(res,'lxml')

soup

<!-- START CAPTCHA --><html><head><script async="" defer="" src="https://www.recaptcha.net/recaptcha/api.js?onload=onloadCallback&amp;render=explicit" type="text/javascript"></script>
<script type="text/javascript">
           var onloadCallback = function() {
           	if ($('#captchaImage').length) { 
               		grecaptcha.render('captchaImage', {
               			'sitekey' : '6Ld0yEcUAAAAAHz2KIeCdmJjT01qzGKXBpexp_ke'
               		});
                }
               if ($('#captchaImage').length) { 
               		grecaptcha.render('captchaImage2', {
               			'sitekey' : '6Ld0yEcUAAAAAHz2KIeCdmJjT01qzGKXBpexp_ke'
               		});
                }
               if ($('#captchaImage').length) { 
               		grecaptcha.render('captchaImage3', {
               			'sitekey' : '6Ld0yEcUAAAAAHz2KIeCdmJjT01qzGKXBpexp_ke'
               		});
                }
     		};
   	</script>
<!-- END CAPTCHA -->
<!DOCTYPE html>

<meta content="width=device-width, i

In [3]:
soup.find_all(['div'], {'class':['table-data data-date']})

[<div class="table-data data-date">
 <table>
 <tbody>
 <tr scope="row"><th scope="col">Advisory</th><th class="traveladvisory" scope="col">Level</th><th class="data-date traveladvisory" scope="col">Date Updated</th></tr>
 <tr data-date="10/16/23">
 <td><a href="/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html" title="Israel, the West Bank and Gaza Travel Advisory ">Israel, the West Bank and Gaza Travel Advisory </a></td>
 <td>Other</td>
 <td>October 14, 2023</td>
 </tr>
 <tr data-date="7/26/23">
 <td><a href="/content/travel/en/traveladvisories/traveladvisories/Liechtenstein-Travel-Advisory.html" title="Liechtenstein Travel Advisory">Liechtenstein Travel Advisory</a></td>
 <td>Level 1: Exercise Normal Precautions</td>
 <td>July 26, 2023</td>
 </tr>
 <tr data-date="7/26/23">
 <td><a href="/content/travel/en/traveladvisories/traveladvisories/north-macedonia-travel-advisory.html" title="North Macedonia Travel Advisory">North Macedonia Tra

In [4]:
data = []
table = soup.find('table')
table_body = table.find('tbody')

rows = table_body.find_all('tr')
for row in rows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]

    a = row.find_all('a', href=True)
    if len(a) > 0 and a[0] is not None:
        cols.append('https://travel.state.gov' + a[0]['href'])
    else:
        cols.append('')
        
    data.append([ele for ele in cols if ele]) # Get rid of empty values
    
data = [ele for ele in data if len(ele) > 0] # Get rid of empty values
data

[['Israel, the West Bank and Gaza Travel Advisory',
  'Other',
  'October 14, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html'],
 ['Liechtenstein Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 26, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/Liechtenstein-Travel-Advisory.html'],
 ['North Macedonia Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 26, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/north-macedonia-travel-advisory.html'],
 ['Nauru Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 24, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/nauru-travel-advisory.html'],
 ['Palau Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 24, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/trav

In [5]:
data2 = []

for c in data:
    # US gov list of travel advisories
    URL = c[3]
    #URL = "https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/eritrea-travel-advisory.html"
    res = requests.get(URL).text
    soup2 = BeautifulSoup(res,'lxml')

    Ts = []
    for row in soup2.find_all(['a'], {'class':['showThreat']}):
        Ts.append(row.text)
        
    Ts = str(Ts).replace("[", "").replace("]", "").replace("'", "") # convert to string
    c.append(Ts)
    data2.append(c)
    
data2

[['Israel, the West Bank and Gaza Travel Advisory',
  'Other',
  'October 14, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html',
  'O, U, T'],
 ['Liechtenstein Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 26, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/Liechtenstein-Travel-Advisory.html',
  ''],
 ['North Macedonia Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 26, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/north-macedonia-travel-advisory.html',
  ''],
 ['Nauru Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 24, 2023',
  'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/nauru-travel-advisory.html',
  'H'],
 ['Palau Travel Advisory',
  'Level 1: Exercise Normal Precautions',
  'July 24, 2023',
  'https://travel.state.gov/content

In [6]:
## Threat descriptions (for reference)
for row in soup2.find_all(['a'], {'class':['showThreat', 'hideThreat']}):
    #print(row)
    print(row.text, ' - ', row['data-tooltip'])

O  -  Other: There are potential risks not covered by previous risk indicators. Read the Travel Advisory for details.

D  -  The risk of wrongful detention of U.S. nationals by a foreign government exists.  

K  -  Kidnapping/Hostage Taking: Kidnapping and/or hostage taking occurs in areas of the country.

E  -  Time-limited Event: Short-term event, such as elections, sporting events, or other incidents that may pose safety risks.

N  -  Natural Disaster: A natural disaster, or its aftermath, poses danger.

H  -  Health: Health risks are present, including current disease outbreaks or crises that disrupt a country’s medical infrastructure.

U  -  Civil Unrest: Political, economic, religious and/or ethnic instability exists and may cause violence, major disruptions, and/or safety risks.

T  -  Terrorism:  Terrorist attacks have occurred and/or specific threats against civilians, groups, or other targets may exist.

C  -  Crime: Widespread violent or organized crime is present in areas o

## Create Dataframe

In [7]:
df = pd.DataFrame(data2, columns = ['advisory', 'level', 'date_updated', 'url', 'threats'])
df['country'] = df['advisory'].str.replace(' Travel Advisory', '')
df['level_clean'] = df['level'].str.slice(0, 7, 1)
df['threat_O'] = 0
df.loc[np.where(df['threats'].str.contains('O'))[0], 'threat_O'] = 1
df['threat_D'] = 0
df.loc[np.where(df['threats'].str.contains('D'))[0], 'threat_D'] = 1
df['threat_K'] = 0
df.loc[np.where(df['threats'].str.contains('K'))[0], 'threat_K'] = 1
df['threat_E'] = 0
df.loc[np.where(df['threats'].str.contains('E'))[0], 'threat_E'] = 1
df['threat_N'] = 0
df.loc[np.where(df['threats'].str.contains('N'))[0], 'threat_N'] = 1
df['threat_H'] = 0
df.loc[np.where(df['threats'].str.contains('H'))[0], 'threat_H'] = 1
df['threat_U'] = 0
df.loc[np.where(df['threats'].str.contains('U'))[0], 'threat_U'] = 1
df['threat_T'] = 0
df.loc[np.where(df['threats'].str.contains('T'))[0], 'threat_T'] = 1
df['threat_C'] = 0
df.loc[np.where(df['threats'].str.contains('C'))[0], 'threat_C'] = 1
df

Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C
0,"Israel, the West Bank and Gaza Travel Advisory",Other,"October 14, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html,"O, U, T","Israel, the West Bank and Gaza",Other,1,0,0,0,0,0,1,1,0
1,Liechtenstein Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/Liechtenstein-Travel-Advisory.html,,Liechtenstein,Level 1,0,0,0,0,0,0,0,0,0
2,North Macedonia Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/north-macedonia-travel-advisory.html,,North Macedonia,Level 1,0,0,0,0,0,0,0,0,0
3,Nauru Travel Advisory,Level 1: Exercise Normal Precautions,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/nauru-travel-advisory.html,H,Nauru,Level 1,0,0,0,0,0,1,0,0,0
4,Palau Travel Advisory,Level 1: Exercise Normal Precautions,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/palau-travel-advisory.html,,Palau,Level 1,0,0,0,0,0,0,0,0,0
5,Burma (Myanmar) Travel Advisory,Level 4: Do Not Travel,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/burma-travel-advisory.html,"O, D, H, U",Burma (Myanmar),Level 4,1,1,0,0,0,1,1,0,0
6,Worldwide Caution,Caution,"October 29, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/worldwide-caution.html,,Worldwide Caution,Caution,0,0,0,0,0,0,0,0,0
7,Afghanistan Travel Advisory,Level 4: Do Not Travel,"June 23, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/afghanistan-advisory.html,"O, K, U, T, C",Afghanistan,Level 4,1,0,1,0,0,0,1,1,1
8,Albania Travel Advisory,Level 2: Exercise Increased Caution,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/albania-travel-advisory.html,C,Albania,Level 2,0,0,0,0,0,0,0,0,1
9,Algeria Travel Advisory,Level 2: Exercise Increased Caution,"July 13, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/algeria-travel-advisory.html,"K, T",Algeria,Level 2,0,0,1,0,0,0,0,1,0


In [8]:
import iso3166

countries_iso3 = {}
for k in iso3166.countries_by_name:
    countries_iso3[k] = iso3166.countries_by_name[k][2]
    
countries_iso3

{'AFGHANISTAN': 'AFG',
 'ÅLAND ISLANDS': 'ALA',
 'ALBANIA': 'ALB',
 'ALGERIA': 'DZA',
 'AMERICAN SAMOA': 'ASM',
 'ANDORRA': 'AND',
 'ANGOLA': 'AGO',
 'ANGUILLA': 'AIA',
 'ANTARCTICA': 'ATA',
 'ANTIGUA AND BARBUDA': 'ATG',
 'ARGENTINA': 'ARG',
 'ARMENIA': 'ARM',
 'ARUBA': 'ABW',
 'AUSTRALIA': 'AUS',
 'AUSTRIA': 'AUT',
 'AZERBAIJAN': 'AZE',
 'BAHAMAS': 'BHS',
 'BAHRAIN': 'BHR',
 'BANGLADESH': 'BGD',
 'BARBADOS': 'BRB',
 'BELARUS': 'BLR',
 'BELGIUM': 'BEL',
 'BELIZE': 'BLZ',
 'BENIN': 'BEN',
 'BERMUDA': 'BMU',
 'BHUTAN': 'BTN',
 'BOLIVIA, PLURINATIONAL STATE OF': 'BOL',
 'BONAIRE, SINT EUSTATIUS AND SABA': 'BES',
 'BOSNIA AND HERZEGOVINA': 'BIH',
 'BOTSWANA': 'BWA',
 'BOUVET ISLAND': 'BVT',
 'BRAZIL': 'BRA',
 'BRITISH INDIAN OCEAN TERRITORY': 'IOT',
 'BRUNEI DARUSSALAM': 'BRN',
 'BULGARIA': 'BGR',
 'BURKINA FASO': 'BFA',
 'BURUNDI': 'BDI',
 'CAMBODIA': 'KHM',
 'CAMEROON': 'CMR',
 'CANADA': 'CAN',
 'CABO VERDE': 'CPV',
 'CAYMAN ISLANDS': 'CYM',
 'CENTRAL AFRICAN REPUBLIC': 'CAF',
 'CHAD': 

In [9]:
df['iso3'] = df['country'].str.upper().map(countries_iso3)
df

Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C,iso3
0,"Israel, the West Bank and Gaza Travel Advisory",Other,"October 14, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html,"O, U, T","Israel, the West Bank and Gaza",Other,1,0,0,0,0,0,1,1,0,
1,Liechtenstein Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/Liechtenstein-Travel-Advisory.html,,Liechtenstein,Level 1,0,0,0,0,0,0,0,0,0,LIE
2,North Macedonia Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/north-macedonia-travel-advisory.html,,North Macedonia,Level 1,0,0,0,0,0,0,0,0,0,MKD
3,Nauru Travel Advisory,Level 1: Exercise Normal Precautions,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/nauru-travel-advisory.html,H,Nauru,Level 1,0,0,0,0,0,1,0,0,0,NRU
4,Palau Travel Advisory,Level 1: Exercise Normal Precautions,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/palau-travel-advisory.html,,Palau,Level 1,0,0,0,0,0,0,0,0,0,PLW
5,Burma (Myanmar) Travel Advisory,Level 4: Do Not Travel,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/burma-travel-advisory.html,"O, D, H, U",Burma (Myanmar),Level 4,1,1,0,0,0,1,1,0,0,
6,Worldwide Caution,Caution,"October 29, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/worldwide-caution.html,,Worldwide Caution,Caution,0,0,0,0,0,0,0,0,0,
7,Afghanistan Travel Advisory,Level 4: Do Not Travel,"June 23, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/afghanistan-advisory.html,"O, K, U, T, C",Afghanistan,Level 4,1,0,1,0,0,0,1,1,1,AFG
8,Albania Travel Advisory,Level 2: Exercise Increased Caution,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/albania-travel-advisory.html,C,Albania,Level 2,0,0,0,0,0,0,0,0,1,ALB
9,Algeria Travel Advisory,Level 2: Exercise Increased Caution,"July 13, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/algeria-travel-advisory.html,"K, T",Algeria,Level 2,0,0,1,0,0,0,0,1,0,DZA


In [10]:
df.sort_values('country')

Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C,iso3
7,Afghanistan Travel Advisory,Level 4: Do Not Travel,"June 23, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/afghanistan-advisory.html,"O, K, U, T, C",Afghanistan,Level 4,1,0,1,0,0,0,1,1,1,AFG
8,Albania Travel Advisory,Level 2: Exercise Increased Caution,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/albania-travel-advisory.html,C,Albania,Level 2,0,0,0,0,0,0,0,0,1,ALB
9,Algeria Travel Advisory,Level 2: Exercise Increased Caution,"July 13, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/algeria-travel-advisory.html,"K, T",Algeria,Level 2,0,0,1,0,0,0,0,1,0,DZA
10,Andorra Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/andorra-travel-advisory.html,,Andorra,Level 1,0,0,0,0,0,0,0,0,0,AND
11,Angola Travel Advisory,Level 2: Exercise Increased Caution,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/angola-travel-advisory.html,"H, C",Angola,Level 2,0,0,0,0,0,1,0,0,1,AGO
12,Anguilla Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/anguilla-travel-advisory.html,,Anguilla,Level 1,0,0,0,0,0,0,0,0,0,AIA
13,Antarctica Travel Advisory,Level 2: Exercise Increased Caution,"January 19, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/antarctica-travel-advisory.html,O,Antarctica,Level 2,1,0,0,0,0,0,0,0,0,ATA
14,Antigua and Barbuda Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/antigua-and-barbuda-travel-advisory.html,,Antigua and Barbuda,Level 1,0,0,0,0,0,0,0,0,0,ATG
172,Argentina Travel Advisory,Level 1: Exercise Normal Precautions,"August 18, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/argentina-travel-advisory.html,,Argentina,Level 1,0,0,0,0,0,0,0,0,0,ARG
173,Armenia Travel Advisory,Level 2: Exercise Increased Caution,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/armenia-travel-advisory.html,O,Armenia,Level 2,1,0,0,0,0,0,0,0,0,ARM


In [11]:
df[df['iso3'].isna()]
#df[df['iso3'].isna()]['country'].tolist()

Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C,iso3
0,"Israel, the West Bank and Gaza Travel Advisory",Other,"October 14, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html,"O, U, T","Israel, the West Bank and Gaza",Other,1,0,0,0,0,0,1,1,0,
5,Burma (Myanmar) Travel Advisory,Level 4: Do Not Travel,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/burma-travel-advisory.html,"O, D, H, U",Burma (Myanmar),Level 4,1,1,0,0,0,1,1,0,0,
6,Worldwide Caution,Caution,"October 29, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/worldwide-caution.html,,Worldwide Caution,Caution,0,0,0,0,0,0,0,0,0,
25,Brunei Travel Advisory,Level 1: Exercise Normal Precautions,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/brunei-travel-advisory.html,,Brunei,Level 1,0,0,0,0,0,0,0,0,0,
31,Macau Travel Advisory,Level 3: Reconsider Travel,"June 30, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/macau-travel-advisory.html,"O, D",Macau,Level 3,1,1,0,0,0,0,0,0,0,
43,Czech Republic Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/czech-republic-travel-advisory.html,,Czech Republic,Level 1,0,0,0,0,0,0,0,0,0,
69,United Kingdom Travel Advisory,Level 2: Exercise Increased Caution,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/united-kingdom-travel-advisory.html,T,United Kingdom,Level 2,0,0,0,0,0,0,0,1,0,
78,Curacao Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/curacao-travel-advisory.html,,Curacao,Level 1,0,0,0,0,0,0,0,0,0,
79,French West Indies Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/french-west-indies-travel-advisory.html,,French West Indies,Level 1,0,0,0,0,0,0,0,0,0,
83,Sint Maarten Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/sint-maarten-travel-advisory.html,,Sint Maarten,Level 1,0,0,0,0,0,0,0,0,0,


In [18]:
# make a copy of the dataframe
df_clean = df.copy()

# map of manual iso3 codes for missing values (from https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
countries_iso3_manual = {
 'Burma (Myanmar)': 'MMR', #BUR (old code)
 'Brunei': 'BRN',
 'Macau': 'MAC',
 'Czech Republic': 'CZE',
 'United Kingdom': 'GBR',
 'Curacao': 'CUW',
 'French West Indies': 'GLP', #assigning to largest by population
 'Sint Maarten': 'SXM',
 'Iran': 'IRN',
 "North Korea (Democratic People's Republic of Korea)": 'PRK',
 'South Korea': 'KOR',
 'The Kyrgyz Republic': 'KGZ',
 'Laos': 'LAO',
 'Micronesia': 'FSM',
 'Moldova': 'MDA',
 'Russia': 'RUS',
 'Solomon Island': 'SLB',
 'Syria': 'SYR',
 'Taiwan': 'TWN',
 'Tanzania': 'TZA',
 'Turkey': 'TUR',
 'Venezuela': 'VEN',
 'Vietnam': 'VNM',
 'British Virgin Islands': 'VGB',
 'The Bahamas': 'BHS',
 'Bolivia': 'BOL',
 "Cote d'Ivoire": 'CIV',
 'The Gambia': 'GMB',
 'Democratic Republic of the Congo': 'COD',
 'Republic of the Congo': 'COG',
 'Bonaire': 'BES',
 'Sint Eustatius': 'BES', #will capture only Bonaire
 'Saba': 'BES' #will capture only Bonaire
}

df_clean.loc[df_clean['iso3'].isna(), 'iso3'] = df_clean.loc[df_clean['iso3'].isna(), 'country'].map(countries_iso3_manual)
print(df_clean.shape)

# fix up Bonaire, Sint Eustatius and Saba
df_clean = df_clean.drop(df_clean[(df_clean['country'] == 'Sint Eustatius') | (df_clean['country'] == 'Saba')].index)
df_clean.loc[df_clean['country'] == 'Bonaire', 'country'] = 'Bonaire, Sint Eustatius and Saba'
df_clean.reset_index(inplace=True, drop=True)

# drop Worldwide
df_clean = df_clean.drop(df_clean[(df_clean['country'] == 'Worldwide Caution')].index)
df_clean.reset_index(inplace=True, drop=True)

#df_clean[df_clean['iso3'].isna()] #will not match to non-counties (e.g. Worldwide Caution/Israel, the West Bank and Gaza Travel Advisory)
print(df_clean.shape)
df_clean.sort_values('country')

(212, 17)
(209, 17)


Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C,iso3
6,Afghanistan Travel Advisory,Level 4: Do Not Travel,"June 23, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/afghanistan-advisory.html,"O, K, U, T, C",Afghanistan,Level 4,1,0,1,0,0,0,1,1,1,AFG
7,Albania Travel Advisory,Level 2: Exercise Increased Caution,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/albania-travel-advisory.html,C,Albania,Level 2,0,0,0,0,0,0,0,0,1,ALB
8,Algeria Travel Advisory,Level 2: Exercise Increased Caution,"July 13, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/algeria-travel-advisory.html,"K, T",Algeria,Level 2,0,0,1,0,0,0,0,1,0,DZA
9,Andorra Travel Advisory,Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/andorra-travel-advisory.html,,Andorra,Level 1,0,0,0,0,0,0,0,0,0,AND
10,Angola Travel Advisory,Level 2: Exercise Increased Caution,"July 24, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/angola-travel-advisory.html,"H, C",Angola,Level 2,0,0,0,0,0,1,0,0,1,AGO
11,Anguilla Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/anguilla-travel-advisory.html,,Anguilla,Level 1,0,0,0,0,0,0,0,0,0,AIA
12,Antarctica Travel Advisory,Level 2: Exercise Increased Caution,"January 19, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/antarctica-travel-advisory.html,O,Antarctica,Level 2,1,0,0,0,0,0,0,0,0,ATA
13,Antigua and Barbuda Travel Advisory,Level 1: Exercise Normal Precautions,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/antigua-and-barbuda-travel-advisory.html,,Antigua and Barbuda,Level 1,0,0,0,0,0,0,0,0,0,ATG
171,Argentina Travel Advisory,Level 1: Exercise Normal Precautions,"August 18, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/argentina-travel-advisory.html,,Argentina,Level 1,0,0,0,0,0,0,0,0,0,ARG
172,Armenia Travel Advisory,Level 2: Exercise Increased Caution,"July 17, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/armenia-travel-advisory.html,O,Armenia,Level 2,1,0,0,0,0,0,0,0,0,ARM


## Summary Stats 

In [13]:
df.shape

(212, 17)

In [14]:
df.describe(include='all')

Unnamed: 0,advisory,level,date_updated,url,threats,country,level_clean,threat_O,threat_D,threat_K,threat_E,threat_N,threat_H,threat_U,threat_T,threat_C,iso3
count,212,212,212,212,212.0,212,212,212.0,212.0,212.0,212.0,212.0,212.0,212.0,212.0,212.0,177
unique,212,6,41,212,46.0,212,6,,,,,,,,,,177
top,"Israel, the West Bank and Gaza Travel Advisory",Level 1: Exercise Normal Precautions,"July 26, 2023",https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/israel-west-bank-and-gaza-travel-advisory.html,,"Israel, the West Bank and Gaza",Level 1,,,,,,,,,,LIE
freq,1,93,42,1,81.0,1,93,,,,,,,,,,1
mean,,,,,,,,0.273585,0.04717,0.169811,0.014151,0.009434,0.080189,0.245283,0.287736,0.353774,
std,,,,,,,,0.446854,0.212504,0.376356,0.118393,0.096898,0.272228,0.431273,0.453779,0.479272,
min,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
25%,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
50%,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
75%,,,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,


In [15]:
df['level_clean'].value_counts()

level_clean
Level 1    93
Level 2    73
Level 4    21
Level 3    21
Other       3
Caution     1
Name: count, dtype: int64

In [16]:
#for t in ['O', 'D', 'K', 'E', 'N', 'H', 'U', 'T', 'C']:
#    col = 'threat_' + t
#    print(col)
#    print(df[col].value_counts())

### Plot Countries on Map

In [31]:
import plotly.express as px

# create choropleth map for the data
# color will be the column to be color-coded
# locations is the column with spatial coordinates
fig = px.choropleth(df_clean.sort_values('level_clean'), 
                    locations='iso3', 
                    color='level_clean',
                    labels={'level_clean': 'Level'},
                    hover_name='country',
                    hover_data={'level_clean':False, 'iso3':False, 'level':True, 'threats':True, 'date_updated':True}, # hover text
                    color_discrete_sequence=['lightgreen', 'lightblue', 'lightgoldenrodyellow', 'lightcoral', 'lavender'])

fig.update_layout(
    title_text = 'Country by Travel Advisory Level',
    title_x=0.5
)

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=10,
        font_family="Rockwell"
    )
)
 
fig.show()