In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.style
matplotlib.style.use("seaborn-v0_8")
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import textwrap
import seaborn as sn
sn.color_palette("hls", 17)
import scipy.stats as st
import math
from sklearn.metrics import cohen_kappa_score
import pingouin as pg
plt.rcParams["font.family"] = "Times New Roman"

In [2]:
import sys
import os
sys.path.append(os.path.join("..", "..", "..", ".."))

from mika.kd.trend_analysis import *
from mika.utils import Data
from mika.utils.SAFECOM import get_SAFECOM_severity_USFS

In [3]:
file = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, os.pardir, os.pardir)),"data/SAFECOM/SAFECOM_UAS_fire_data.csv")

In [4]:
list_of_attributes = ['Narrative']
extra_cols = ['Agency', 'Region', 'Location', 'Date', 'Date Submitted', 'Tracking #',
              'Mission Type', 'Persons Onboard', 'Departure Point', 'Destination',
              'Special Use', 'Damages', 'Injuries', 'Hazardous Materials', 'Other Mission Type',
              'Type', 'Manufacturer', 'Model', 'Hazard', 'Incident Management',
              'UAS', 'Accident', 'Airspace', 'Maintenance', 'Mishap Prevention'
              ]
document_id_col = 'Tracking #'
safecom = Data()
safecom.load(file, preprocessed=True, id_col=document_id_col, text_columns=list_of_attributes)
preprocessed_df = safecom.data_df

In [5]:
#extract event occurrence year
preprocessed_df['Year'] = [preprocessed_df.at[i,'Date'].split('/')[-1] for i in range(len(preprocessed_df))]
preprocessed_df['Day'] = [preprocessed_df.at[i,'Date'].split('/')[1] for i in range(len(preprocessed_df))]
preprocessed_df['Month'] = [preprocessed_df.at[i,'Date'].split('/')[0] for i in range(len(preprocessed_df))]

In [8]:
cluster = []
grouping_col = "Mode"
manual_groups = pd.read_excel("SAFECOM_UAS_clusters_v1.xlsx")
id_col = "Tracking #"
new_data_df = preprocessed_df.copy()
rows_added = 0
for i in range(len(preprocessed_df)):
    id_ = preprocessed_df.iloc[i][id_col]
    group = manual_groups.loc[manual_groups[id_col]==id_].reset_index(drop=True)
    if len(group) == 1: #reports with one hazard
        cluster.append(group.at[0,grouping_col])
    elif len(group) == 0: #reports with no hazards
        cluster.append('misc')
    elif len(group) >= 2: #reports with 2 or more hazards #something is wrong here!!
        for j in range(len(group)):
            cluster.append(group.at[j,grouping_col])
            if j>0:
                new_data_df = pd.concat([new_data_df.iloc[:i+rows_added][:],preprocessed_df.iloc[i:i+1][:], new_data_df.iloc[i+rows_added:][:]]).reset_index(drop=True)
                rows_added += 1
data_df_all_rows = new_data_df
data_df_all_rows["cluster"] = cluster #need to add extra rows to data df for documents in multiple clusters

In [13]:
def calc_severity(df):
    severities = []
    for i in range(len(df)):
        severities.append(safecom_severity(df.iloc[i]['Hazardous Materials'], df.iloc[i]['Injuries'], df.iloc[i]['Damages']))
    df['severity'] = severities
    return df

def safecom_severity(hazardous_mat, injury, damage):
    key_dict = {"No":0, "Yes":1}
    severity = key_dict[hazardous_mat] + key_dict[injury] + key_dict[damage]
    if np.isnan(severity):
        severity=0
    return severity

In [14]:
frequency, docs_per_row = identify_docs_per_fmea_row(data_df_all_rows, 'cluster', 'Year', id_col)
data_df_all_rows = calc_severity(data_df_all_rows)

In [15]:
severities, total_severities_hazard = calc_severity_per_hazard(docs_per_row, data_df_all_rows, id_col, metric='max')

In [17]:
total_severities_hazard

{'UAS Intrusion': 1,
 'Hobbyist offers services': 0,
 'Employee personal drone usage without certification/official approval': 0,
 'Communications/flight navigation with UAS failed': 0,
 'Battery': 1,
 'Loss of control': 1,
 'High wind pushes UAS off course': 1,
 'Loss of GPS on UAS': 1,
 'Propellor arm disconnect (sheared bolt heads or snaps)': 1,
 'Fight plan error': 1,
 'Motor failure': 1,
 'Pilot Error': 1,
 'Manned Aircraft Intrusion': 0,
 'Engine Failure': 1,
 'Hang Fire': 0,
 'Loss of LOS': 1,
 'UAS airspace separation issue': 0,
 'Casing dislodged': 0,
 'Motor Mount Failure': 2,
 'Pins or needles dislodged': 0,
 'Loss of GCS': 1,
 'Inappropriate Landing Site': 1}

In [None]:
severities_USFS = get_SAFECOM_severity_USFS(severities)
likelihood = get_likelihood_USFS(rates)