In [2]:
import requests
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import json

In [3]:
df = pd.read_csv("Airplane_Crashes_and_Fatalities_Since_1908.csv")

Inspired by the the analysis of "Salvador Dali":
https://github.com/salvador-dali/kaggle/blob/master/public_notebooks/01_airplane_crashes/investigation.md

## Tags

In [144]:
def investigate(s):
    data = {
        'weather': [
            'fog', ' rain', 'unlighted', 'thunder', 'turbulence', 'air pocket', 'adverse weather', 
            'mist', 'weather conditions', 'storm', 'typhoon', 'icing', 'bad weather', 'poor weather',
            'meteorological conditions', 'head wind', 'lightning', 'weather was poor', 'snow',
            'weather related', ' ice'
        ],
        'lost_control': ['disorientation', 'low altitude', 'loss of control'],
        'crash_landing': ['short of the runway', 'attempting to land', 'on approach','landing', 'final approach'],
        'crash_takingoff': ['taking off', 'takeoff', 'take off','took off'],
        'air_collision': ['mid-air', 'in-flight collision', 'midair', 'planes collided'],
        'shot_down': ['shot down', 'missile', 'rebel', 'fighter'],
        'mechanical_fail': [
            'engine', 'propeller','technical malfunction', 'mechanical failure', 'rotor', 'out of fuel', 'system failure', 'component failure',
            'fatigue'
        ],
        'navigation error': [
            'navigational error', 'disoriented', 'altimiter', 'poor visibility', 'altimeter',
            'compass', 'gyros', 'navigational equipment', 'erroneous navigation'
        ],
        'human_error': [
            'failure of the crew', 'pilot error', 'did not follow', 'crew ignored', 'failure to', 
            'delayed landing', 'overloaded', 'misinterpretation', 'misjudge', 'failed to', 'lost control', 
            'inadequate risk', 'improper use', 'midjudge', 'poor crew'
        ],
        'mountain':['mountain','mountains'],
        'sea':['sea','ocean'],
        'terract': ['bomb', 'hijacker']
    }

    res = []
    for el, words in data.items():
        res += [el for word in words if word in s]
        
    return list(set(res))

df['Summary'].fillna('', inplace=True)
all_values = []
df['tags'] = df.Summary.apply(lambda x:investigate(x.lower()))



#plt.figure(figsize=(20, 16))
#plt.legend(fontsize="10") # using a named size

#print(pd.DataFrame(all_values)[0].value_counts().sum())
#print(df.count())


In [147]:
df.Date.loc[df.tags.apply(len) != 0].count() / (df.Date.count() - df.Date.loc[df.Summary == ""].count())

0.84973349733497339

85% of the flight with a description have a tag now.

### Get the google api key
If you don't have one, create one on https://developers.google.com/maps/documentation/geocoding/get-api-key?hl=fr
and store it in a file called credentials.json

In [64]:
json_data=open('credentials.json').read()
data = json.loads(json_data)
API_KEY = data["google_api_key"]

In [146]:
url = "https://maps.googleapis.com/maps/api/geocode/json?address="
#address = "12 rue deu repos Lyon"
key = "&key=" +  API_KEY

def getLocation(address):
    response = requests.get(url+str(address)+key)
    if response.json()['status']=="OK":
        lat = response.json()['results'][0]['geometry']['location']['lat']
        lng = response.json()['results'][0]['geometry']['location']['lng']
    else:
        lat = np.nan
        lng = np.nan
    return pd.Series({'lat':lat,'lng':lng})

In [147]:
df = df.merge(df.Location.apply(lambda address: getLocation(address)), left_index=True, right_index=True)

In [149]:
df.to_csv('aircrashes1.csv')

In [6]:
import folium

m = folium.Map()

tooltip = 'Click me!'
#df[:10].apply(lambda x: print(x['lat']),axis = 1)
def diplay(x):
    if x['lat'] == x['lat']:
        popup = '<i>'+x.Date+'<i/></br>'
        popup += '<i>'+str(x.Summary)+'<i/>'
        folium.Marker([x['lat'], x['lng']], popup=popup).add_to(m)
    
    
df.apply(lambda x:diplay(x),axis = 1)
#folium.Marker([45.3288, -121.6625], popup='<i>Mt. Hood Meadows</i>').add_to(m)
#folium.Marker([45.3311, -121.7113], popup='<b>Timberline Lodge</b>', tooltip=tooltip).add_to(m)
m

In [4]:
df =pd.read_csv('aircrashes1.csv')

In [5]:
df[df.lat.isnull()]

Unnamed: 0.1,Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,lat,lng
6,6,09/03/1915,15:20,"Off Cuxhaven, Germany",Military - German Navy,,,Zeppelin L-10 (airship),,,19.0,19.0,0.0,"Exploded and burned near Neuwerk Island, when...",,
13,13,03/30/1917,,Off Northern Germany,Military - German Navy,,,Schutte-Lanz S-L-9 (airship),,,23.0,23.0,0.0,Struck by lightning and crashed into the Balti...,,
15,15,06/14/1917,08:45,"Off Vlieland Island, North Sea",Military - German Navy,,,Zeppelin L-43 (airship),,,24.0,24.0,0.0,Shot down by British aircraft.,,
16,16,08/21/1917,07:00,Off western Denmark,Military - German Navy,,,Zeppelin L-23 (airship),,,18.0,18.0,0.0,Shot down by British aircraft.,,
19,19,05/10/1918,,"Off Helgoland Island, Germany",Military - German Navy,,,Zeppelin L-70 (airship),,,22.0,22.0,0.0,Shot down by British aircraft crashing from a ...,,
20,20,08/11/1918,10:00,"Ameland Island, North Sea",Military - German Navy,,,Zeppelin L-53 (airship),,,19.0,19.0,0.0,Shot down by british aircraft.,,
39,39,10/02/1920,,"Off Port Vendres, France",Latecoere Airlines,,,Salmson 2-A-2,F-ALAI,31,2.0,2.0,0.0,,,
102,102,07/03/1926,,"Rossaugpt, Czechoslovakia",Compagnie Internationale de Navigation Aérienne,,Paris - Prague,Caudron C-61,F-AFBT,5307,7.0,7.0,0.0,Crashed while en route.,,
124,124,10/06/1927,,"Thies, Mauritania",Lignes Aeriennes Latecoere,,,Breguet 14,F-AGBN,,2.0,2.0,0.0,,,
127,127,11/16/1927,,Over the Gulf of Finland,Aero O-Y,,,Junkers F-13,K-SALD,798,6.0,6.0,0.0,,,
