# Capstone Part03
*Exploring Visualisation Techniques for Aviation Accident Analysis*
> NOTE: The maps from folium and plotly are not very jupyternotebook-github friendly. The maps have been saved in the project folder to view in Github.
- ./world_map_accidents.html
- ./world_heatmap_accidents.png
PS: its best when viewed on jupyter notebook as it is interactive and the text labels are better when hovered.

## Submitted by Roshan Lulu

# Introduction

### 1. Plot Crash Locations on the world map

In [1]:
import folium
import pandas as pd

In [2]:
# Write save it to a csv file
data = pd.read_csv('./dataset/crashes_geolocation.csv')

In [3]:
data.head()

Unnamed: 0,SubLocation,Country,latlon,place,sub_latlon,sub_place,FlighNumber
0,unknown,Peru,"(-9.189967, -75.015152)",Peru,"(-9.189967, -75.015152)",Peru,0
1,at an unknown location,Germany,"(51.165691, 10.451526)",Germany,"(51.165691, 10.451526)",Germany,1
2,"Boca Chica, Tamaulipas",Mexico,"(23.634501, -102.552784)",Mexico,"(25.9516829, -97.3341488)","Boca Chica Blvd, Brownsville, TX, USA",2
3,Akyab Main,Myanmar,"(21.916221, 95.955974)",Myanmar (Burma),"(20.132722, 92.872956)","Sittwe Airport (AKY), Ye Nwe Su Quarter, Sittw...",3
4,near Río Putumayo,Brazil,"(-14.235004, -51.92528)",Brazil,"(-2.6584061, -69.7378901)",Putumayo River,4


In [4]:
print(data['Country'].isnull().sum())
data[data['Country'].isnull()]
data.dropna(subset=['Country'], inplace=True)

14


In [5]:
data['Country'].isnull().sum()

0

In [6]:
data['latlon'].isnull().sum()
data[data['latlon'].isnull()]

Unnamed: 0,SubLocation,Country,latlon,place,sub_latlon,sub_place,FlighNumber


In [7]:
# Split Country lat and long to different columns
data['latlon'] = [latlong.replace('(','').replace(')','') for latlong in data['latlon']]

In [8]:
data['latitude'] = [latlong.split(',')[0] for latlong in data['latlon']]
data['longitude'] = [latlong.split(',')[1] for latlong in data['latlon']]

## I have to manipulate the Dataframe to Plot.. 
*There are around 8712 datapoints. If I try to plot each point, my system is not able to handle it. Since I am plotting Country wise, for now I can manage to plot countries just once. When it comes to actual location plotting I will have to either divide them up into different locaions or make it possible somehow. Only time will tell... *

In [9]:
data_con = data[['latlon','Country']]

In [10]:
data_tem = pd.DataFrame(data_con['Country'].value_counts())
data_tem.reset_index(inplace = True)
# data_tem['index'].value_counts()

In [11]:
data_con = pd.DataFrame(data_con['latlon'].value_counts())

In [12]:
data_con.reset_index(inplace = True)

In [13]:
data_con['Country'] = data_tem['index']

In [14]:
data_con.columns = ['latlon', 'count','Country']
data_con.head()

Unnamed: 0,latlon,count,Country
0,"37.09024, -95.712891",1581,United States of America
1,"61.52401, 105.318756",431,Russia
2,"55.378051, -3.435973",280,United Kingdom
3,"56.130366, -106.346771",274,Canada
4,"-14.235004, -51.92528",223,Brazil


In [15]:
data_con['latitude'] = [latlong.split(',')[0] for latlong in data_con['latlon']]
data_con['longitude'] = [latlong.split(',')[1] for latlong in data_con['latlon']]

In [16]:
(data_con.head())

Unnamed: 0,latlon,count,Country,latitude,longitude
0,"37.09024, -95.712891",1581,United States of America,37.09024,-95.712891
1,"61.52401, 105.318756",431,Russia,61.52401,105.318756
2,"55.378051, -3.435973",280,United Kingdom,55.378051,-3.435973
3,"56.130366, -106.346771",274,Canada,56.130366,-106.346771
4,"-14.235004, -51.92528",223,Brazil,-14.235004,-51.92528


In [17]:
#create a map
this_map = folium.Map(prefer_canvas=True)

def plotDot(point):
    '''input: series that contains a numeric named latitude and a numeric named longitude
    this function creates a CircleMarker and adds it to your this_map'''
    print(point['latlon'])
    folium.CircleMarker(location=[point.latitude,point.longitude],
                        popup=point['Country'],
                        fill_color='#132b5e',
                        radius=5,
                        weight=0).add_to(this_map)
    
# plotDot(data[0])
# plotDot(data[1])

#use df.apply(,axis=1) to "iterate" through every row in dataframe
data_con.apply(plotDot, axis = 1)
# Set the zoom to the maximum possible
this_map.fit_bounds(this_map.get_bounds())
#Save the map to an HTML file
this_map.save('world_map_accidents.html')
this_map

37.09024, -95.712891
61.52401, 105.318756
55.378051, -3.435973
56.130366, -106.346771
-14.235004, -51.92528
4.570868, -74.297333
20.593684, 78.96288
37.5652016, -77.5346972
46.227638, 2.213749
-0.789275, 113.921327
51.165691, 10.451526
14.058324, 108.277199
36.204824, 138.252924
-25.274398, 133.775136
35.86166, 104.195397
40.46366700000001, -3.74922
23.634501, -102.552784
41.87194, 12.56738
-16.290154, -63.58865299999999
-38.416097, -63.61667199999999
38.9040501, -77.04050860000001
-9.189967, -75.015152
21.521757, -77.781167
12.879721, 121.774017
6.42375, -66.58973
-6.314992999999999, 143.95555
32.427908, 53.688046
-14.5994134, -28.6731465
52.132633, 5.291265999999999
38.963745, 35.243322
-11.202692, 17.873887
21.916221, 95.955974
48.379433, 31.16558
26.820553, 30.802498
60.47202399999999, 8.468945999999999
33.93911, 67.709953
19.85627, 102.495496
-1.831239, -78.18340599999999
51.919438, 19.145136
23.69781, 120.960515
-30.559482, 22.937506
9.081999, 8.675277
-8.783195, -124.508523
30.3

## 2. Plot Countries by Accident Count in terms of heat map

In [18]:
import branca.colormap as cm

In [19]:
blRd = cm.LinearColormap(['blue', 'red'],
                           vmin=min(data_con['count']), vmax=max(data_con['count']))
blRd

In [20]:
import plotly.plotly as py
import pandas as pd
import pycountry

## Updating country names to suit the pycountry library I have used

In [21]:
data_con.loc[data_con['Country'] == 'United States of America', ['Country']] = 'United States'
data_con.loc[data_con['Country'] == 'Russia', ['Country']] = 'Russian Federation'
data_con.loc[data_con['Country'] == 'Vietnam', ['Country']] = 'Viet Nam'
data_con.loc[data_con['Country'] == 'Bolivia', ['Country']] = 'Bolivia, Plurinational State of'
data_con.loc[data_con['Country'] == 'Democratic Republic)', ['Country']] = 'Congo, The Democratic Republic of the'
data_con.loc[data_con['Country'] == 'Venezuela', ['Country']] = 'Venezuela, Bolivarian Republic of'
data_con.loc[data_con['Country'] == 'Iran', ['Country']] = 'Iran, Islamic Republic of'
data_con.loc[data_con['Country'] == 'Laos', ['Country']] = "Lao People's Democratic Republic"
data_con.loc[data_con['Country'] == 'Taiwan', ['Country']] = 'Taiwan, Province of China'
data_con.loc[data_con['Country'] == 'Pacific Ocean', ['Country']] = 'Fiji'
data_con.loc[data_con['Country'] == 'South Korea', ['Country']] = "Korea, Democratic People's Republic of"
data_con.loc[data_con['Country'] == 'Tanzania', ['Country']] = 'Tanzania, United Republic of'
data_con.loc[data_con['Country'] == 'Czech Republic', ['Country']] = 'Czechia'
data_con.loc[data_con['Country'] == 'Syria', ['Country']] = 'Syrian Arab Republic'
data_con.loc[data_con['Country'] == "Cote d'Ivoire", ['Country']] = "Côte d'Ivoire"
data_con.loc[data_con['Country'] == "Falkland Islands", ['Country']] = 'Falkland Islands (Malvinas)'
data_con.loc[data_con['Country'] == "U.S. Virgin Islands", ['Country']] = 'Virgin Islands, U.S.'
data_con.loc[data_con['Country'] == "British Virgin Islands", ['Country']] = 'Virgin Islands, British'
data_con.loc[data_con['Country'] == "North Korea", ['Country']] = 'Korea, Republic of'
data_con.loc[data_con['Country'] == "St. Pierre and Miquelon", ['Country']] = 'Saint Pierre and Miquelon'
data_con.loc[data_con['Country'] == "Micronesia", ['Country']] = 'Micronesia, Federated States of'
data_con.loc[data_con['Country'] == "Reunion", ['Country']] = 'Réunion'
data_con.loc[data_con['Country'] == "St. Kitts and Nevis", ['Country']] = 'Saint Kitts and Nevis'
data_con.loc[data_con['Country'] == "East Timor", ['Country']] = 'Timor-Leste'
data_con.loc[data_con['Country'] == "Keeling) Islands", ['Country']] = 'Cocos (Keeling) Islands'
data_con.loc[data_con['Country'] == "Moldova", ['Country']] = 'Moldova, Republic of'
data_con.loc[data_con['Country'] == "U.S. Minor Outlying Islands", ['Country']] = 'United States Minor Outlying Islands'




data_con.loc[data_con['Country'] == 'Indian Ocean', ['Country']] = 'Brazil'
data_con.loc[data_con['Country'] == 'North Pole', ['Country']] = 'Brazil'
data_con.loc[data_con['Country'] == 'Mediterranean Sea', ['Country']] = 'Brazil'
data_con.loc[data_con['Country'] == 'Atlantic Ocean', ['Country']] = 'Brazil'
data_con.loc[data_con['Country'] == 'Unknown country', ['Country']] = 'Brazil'
data_con.loc[data_con['Country'] == "FYROM)", ['Country']] = 'Brazil'
data_con.head()

Unnamed: 0,latlon,count,Country,latitude,longitude
0,"37.09024, -95.712891",1581,United States,37.09024,-95.712891
1,"61.52401, 105.318756",431,Russian Federation,61.52401,105.318756
2,"55.378051, -3.435973",280,United Kingdom,55.378051,-3.435973
3,"56.130366, -106.346771",274,Canada,56.130366,-106.346771
4,"-14.235004, -51.92528",223,Brazil,-14.235004,-51.92528


In [22]:
input_countries = data_con['Country']

countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
countries

{'Afghanistan': 'AFG',
 'Albania': 'ALB',
 'Algeria': 'DZA',
 'American Samoa': 'ASM',
 'Andorra': 'AND',
 'Angola': 'AGO',
 'Anguilla': 'AIA',
 'Antarctica': 'ATA',
 'Antigua and Barbuda': 'ATG',
 'Argentina': 'ARG',
 'Armenia': 'ARM',
 'Aruba': 'ABW',
 'Australia': 'AUS',
 'Austria': 'AUT',
 'Azerbaijan': 'AZE',
 'Bahamas': 'BHS',
 'Bahrain': 'BHR',
 'Bangladesh': 'BGD',
 'Barbados': 'BRB',
 'Belarus': 'BLR',
 'Belgium': 'BEL',
 'Belize': 'BLZ',
 'Benin': 'BEN',
 'Bermuda': 'BMU',
 'Bhutan': 'BTN',
 'Bolivia, Plurinational State of': 'BOL',
 'Bonaire, Sint Eustatius and Saba': 'BES',
 'Bosnia and Herzegovina': 'BIH',
 'Botswana': 'BWA',
 'Bouvet Island': 'BVT',
 'Brazil': 'BRA',
 'British Indian Ocean Territory': 'IOT',
 'Brunei Darussalam': 'BRN',
 'Bulgaria': 'BGR',
 'Burkina Faso': 'BFA',
 'Burundi': 'BDI',
 'Cabo Verde': 'CPV',
 'Cambodia': 'KHM',
 'Cameroon': 'CMR',
 'Canada': 'CAN',
 'Cayman Islands': 'CYM',
 'Central African Republic': 'CAF',
 'Chad': 'TCD',
 'Chile': 'CHL',
 

In [23]:
codes = [pycountry.countries.get(name = country).alpha_3 for country in input_countries]

In [24]:
data_con['Code'] = codes
data_con.head()

Unnamed: 0,latlon,count,Country,latitude,longitude,Code
0,"37.09024, -95.712891",1581,United States,37.09024,-95.712891,USA
1,"61.52401, 105.318756",431,Russian Federation,61.52401,105.318756,RUS
2,"55.378051, -3.435973",280,United Kingdom,55.378051,-3.435973,GBR
3,"56.130366, -106.346771",274,Canada,56.130366,-106.346771,CAN
4,"-14.235004, -51.92528",223,Brazil,-14.235004,-51.92528,BRA


In [26]:
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
scl = [[1.0, 'rgb(84,39,143)'], [0.8, 'rgb(117,107,177)'],[0.6, 'rgb(158,154,200)'],[0.4, 'rgb(188,189,220)'],\
       [0.2, 'rgb(218,218,235)'], [0.0, 'rgb(242,240,247)']]


df = data_con
data = [ dict(
        type = 'choropleth',
        locations = df['Code'],
        z = df['count'],
        text = df['Country'],
        colorscale = 'Viridis',
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'No of Crashes'),
      ) ]

layout = dict(
    title = 'Aviation Accidents by Country',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )