# FAA unruly passenger investigations initiated

In [1]:
import pandas as pd
import requests
import altair as alt
import numpy as np

### Get url

In [2]:
url = 'https://www.faa.gov/data_research/passengers_cargo/unruly_passengers/'

### Grab the page and extract the table

In [3]:
response = requests.get(url)
table = pd.read_html(response.text, attrs={'class':'striped'})

### Convert the table - a single item in a list - into a dataframe

In [4]:
df = pd.DataFrame(table[0])

### Clean up columns, types in dataframe

In [5]:
df.columns = ['year', 'investigations']

In [6]:
df['year'] = df['year'].astype(str)

In [7]:
df

Unnamed: 0,year,investigations
0,1995,146
1,1996,184
2,1997,237
3,1998,204
4,1999,226
5,2000,255
6,2001,305
7,2002,279
8,2003,286
9,2004,310


---

### Descriptives

In [8]:
# Annual investigations, on average? 
round(df.investigations.mean(), 2)

209.04

In [9]:
# Median?
round(df.investigations.median(), 2)

159.0

In [10]:
# Annual investigations max
round(df.investigations.max(), 2)

906

### What's the average from 1995-2020?

In [11]:
mean_cases = df[df['year'] < '2020']['investigations'].mean()

In [12]:
mean_cases

182.2

### Versus this year? 

In [13]:
this_year = df[df['year'] == '2021']['investigations'].sum()

In [14]:
this_year

906

### % change from mean in 2021

In [15]:
change = ((this_year - mean_cases)/mean_cases*100).round()

In [16]:
change

397.0

### How many cases in the last decade?

In [23]:
five_years = df[(df['year'] > '2015') & (df['year'] < '2021')]

In [26]:
five_years['investigations'].sum()

681

### Chart it

In [19]:
alt.Chart(df).mark_bar(color='orange').encode(
    x='year',
    y='investigations'
).properties(title='FAA unruly passenger investigations')

In [20]:
df.to_csv('data/faa-investigations-initiated.csv', index=False)