# Airline Incidents Safety Data Jupyter Notebook

Data is from https://www.kaggle.com/datasets/tarique7/airline-incidents-safety-data

Last Update: Mar 30 2023

In [39]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

import copy

import autokeras as ak

import tensorflow as tf

## Step 0: Test GPU

In [43]:
print(f'\nTensorflow version = {tf.__version__}\n')
print(f'\n{tf.config.list_physical_devices("GPU")}\n')


Tensorflow version = 2.12.0


[]



In [44]:
import torch

print(f'\nAvailable cuda = {torch.cuda.is_available()}')

print(f'\nGPUs availables = {torch.cuda.device_count()}')

print(f'\nCurrent device = {torch.cuda.current_device()}')

print(f'\nCurrent Device location = {torch.cuda.device(0)}')

print(f'\nName of the device = {torch.cuda.get_device_name(0)}')


Available cuda = True

GPUs availables = 1

Current device = 0

Current Device location = <torch.cuda.device object at 0x7f7d1239b190>

Name of the device = NVIDIA GeForce RTX 3070


## Step 1: Read dataset

In [3]:
data = pd.read_csv('data/Airline Occurences.csv')

## Step 2: Understanding the dataset

In [4]:
data.head()

Unnamed: 0,Report,Part Failure,Occurence Nature condition,Occurence Precautionary Procedures
0,MECHANICAL / LANDING GEAR GND FAIL MSG AFTER T...,RT MLG BRAKE DAMAGED,WARNING INDICATION,OTHER
1,THE NOSE LANDING GEAR DID NOT EXTEND FULLY DUR...,ZONE 700 MALFUNCTIONED,WARNING INDICATION,ABORTED APPROACH
2,THE LEFT SIDE HYDRAULIC SYSTEM FILTER BOWL ASS...,HYD FILTER FAILED,OTHER,ABORTED APPROACH
3,AIRCRAFT WAS ON ROLLOUT DURING A NORMAL LANDIN...,LEFT COLLAPSED,OTHER,OTHER
4,UPON TAKEOFF ROLL BUT PRIOR TO REACHING 80 KNO...,ZONE 600 CRACKED,WARNING INDICATION,ABORTED TAKEOFF


In [7]:
data.shape

(100028, 4)

In [54]:
data.dtypes

Report                                object
Part Failure                          object
Occurence Nature condition            object
Occurence Precautionary Procedures    object
dtype: object

In [55]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100028 entries, 0 to 100027
Data columns (total 4 columns):
 #   Column                              Non-Null Count   Dtype 
---  ------                              --------------   ----- 
 0   Report                              100028 non-null  object
 1   Part Failure                        100028 non-null  object
 2   Occurence Nature condition          100028 non-null  object
 3   Occurence Precautionary Procedures  100028 non-null  object
dtypes: object(4)
memory usage: 3.1+ MB


In [56]:
print(data.isnull().sum())

Report                                0
Part Failure                          0
Occurence Nature condition            0
Occurence Precautionary Procedures    0
dtype: int64


In [5]:
data['Occurence Nature condition'].value_counts()

OTHER                         82172
NO TEST                        3577
SMOKE/FUMES/ODORS/SPARKS       2166
FLUID LOSS                      975
FLT CONT AFFECTED               320
OVER TEMP                       316
VIBRATION/BUFFET                228
INADEQUATE Q C                  223
AFFECT SYSTEMS                  202
F.O.D.                          191
PARTIAL RPM/PWR LOSS            152
MULTIPLE FAILURE                107
FLAME/FIRE                       65
ENGINE STOPPAGE                  56
SIGNIFICANT FAILURE REPORT       53
INFLIGHT SEPARATION              47
ENGINE FLAMEOUT                  38
ELECT. POWER LOSS-50 PC          27
FLT. ATTITUDE INST.              21
Name: Occurence Nature condition, dtype: int64

In [6]:
data['Occurence Precautionary Procedures'].value_counts()

NONE                        79268
OTHER                       14855
UNSCHED LANDING              3627
ABORTED TAKEOFF               719
RETURN TO BLOCK               556
EMER. DESCENT                 422
ENGINE SHUTDOWN               229
ABORTED APPROACH              142
DEACTIVATE SYST/CIRCUITS      135
DUMP FUEL                      25
ACTIVATE FIRE EXT.             22
O2 MASK DEPLOYED               21
AUTOROTATION                    4
CABIN DEPRESSURIZATION          3
Name: Occurence Precautionary Procedures, dtype: int64

## Step 3: Text Preprocessing

In [29]:
def text_preprocessing(data):
    stop_words = set(stopwords.words('english'))
    data_columns_list = list(data.columns)
    for i in range(len(data_columns_list)):
        data[data_columns_list[i]] = data[data_columns_list[i]].str.strip().str.lower()
        for j in range(len(data)):
            text = data[data_columns_list[i]].loc[j]
            text = ' '.join([word for word in text.split() if word not in stop_words])
            data[data_columns_list[i]].loc[j] = text
    return data

In [30]:
list(data.columns)

['Report',
 'Part Failure',
 'Occurence Nature condition',
 'Occurence Precautionary Procedures']

In [35]:
data_processed = copy.deepcopy(data) # Remove reference pointers
data_output = text_preprocessing(data_processed)

In [36]:
data_output.head()

Unnamed: 0,Report,Part Failure,Occurence Nature condition,Occurence Precautionary Procedures
0,mechanical / landing gear gnd fail msg takeoff...,rt mlg brake damaged,warning indication,
1,nose landing gear extend fully approach seen i...,zone 700 malfunctioned,warning indication,aborted approach
2,left side hydraulic system filter bowl assembl...,hyd filter failed,,aborted approach
3,aircraft rollout normal landing. landing gear ...,left collapsed,,
4,upon takeoff roll prior reaching 80 knots pilo...,zone 600 cracked,warning indication,aborted takeoff


## Step 4: Model Building

In [50]:
X = data_output[['Report', 'Part Failure']]
X_new = data_output['Report']
y = data_output['Occurence Nature condition']
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.25, random_state=42)

In [51]:
X_train_ak = np.array(X_train)
X_test_ak = np.array(X_test)
y_train_ak = np.array(y_train)
y_test_ak = np.array(y_test)

In [53]:
keras = ak.TextRegressor(overwrite=True, max_trials=5)

keras.fit(X_train_ak, y_train_ak, epochs=30, validation_split=0.2)

Trial 2 Complete [00h 00m 02s]

Best val_loss So Far: None
Total elapsed time: 00h 00m 05s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
vanilla           |vanilla           |text_block_1/block_type
5000              |5000              |text_block_1/max_tokens
64                |64                |text_block_1/text_to_int_sequence_1/output_sequence_length
none              |none              |text_block_1/embedding_1/pretraining
128               |128               |text_block_1/embedding_1/embedding_dim
0.25              |0.25              |text_block_1/embedding_1/dropout
3                 |3                 |text_block_1/conv_block_1/kernel_size
False             |False             |text_block_1/conv_block_1/separable
True              |True              |text_block_1/conv_block_1/max_pooling
2                 |2                 |text_block_1/conv_block_1/num_blocks
2                 |2                 |text_block_1/conv_block_1/num_layers
32       

2023-03-30 19:27:28.955657: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype string and shape [75021]
	 [[{{node Placeholder/_20}}]]
2023-03-30 19:27:28.955884: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_5' with dtype string and shape [75021]
	 [[{{node Placeholder/_5}}]]


Epoch 1/30


2023-03-30 19:27:30.161054: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_15' with dtype string and shape [75021]
	 [[{{node Placeholder/_15}}]]
2023-03-30 19:27:30.161273: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [75021]
	 [[{{node Placeholder/_0}}]]
2023-03-30 19:27:31.008289: W tensorflow/core/framework/op_kernel.cc:1807] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
Traceback (most recent call last):
  File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 266, i

RuntimeError: Number of consecutive failures excceeded the limit of 3.
Traceback (most recent call last):
  File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 266, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 231, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/engine/tuner.py", line 101, in _build_and_fit_model
    _, history = utils.fit_with_adaptive_batch_size(
  File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 88, in fit_with_adaptive_batch_size
    history = run_with_adaptive_batch_size(
  File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 101, in run_with_adaptive_batch_size
    history = func(x=x, validation_data=validation_data, **fit_kwargs)
  File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 89, in <lambda>
    batch_size, lambda **kwargs: model.fit(**kwargs), **fit_kwargs
  File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/devkelvin/.local/lib/python3.10/site-packages/tensorflow/python/eager/execute.py", line 52, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.UnimplementedError: Graph execution error:

Detected at node 'mean_squared_error/Cast' defined at (most recent call last):
    File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/devkelvin/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/home/devkelvin/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.10/asyncio/base_events.py", line 600, in run_forever
      self._run_once()
    File "/usr/lib/python3.10/asyncio/base_events.py", line 1896, in _run_once
      handle._run()
    File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/devkelvin/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_5987/4269979880.py", line 3, in <module>
      keras.fit(X_train_ak, y_train_ak, epochs=30, validation_split=0.2)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/tasks/text.py", line 292, in fit
      history = super().fit(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/auto_model.py", line 292, in fit
      history = self.tuner.search(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/engine/tuner.py", line 193, in search
      super().search(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 226, in search
      self._try_run_and_update_trial(trial, *fit_args, **fit_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 266, in _try_run_and_update_trial
      self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 231, in _run_and_update_trial
      results = self.run_trial(trial, *fit_args, **fit_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
      obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/engine/tuner.py", line 101, in _build_and_fit_model
      _, history = utils.fit_with_adaptive_batch_size(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 88, in fit_with_adaptive_batch_size
      history = run_with_adaptive_batch_size(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 101, in run_with_adaptive_batch_size
      history = func(x=x, validation_data=validation_data, **fit_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/autokeras/utils/utils.py", line 89, in <lambda>
      batch_size, lambda **kwargs: model.fit(**kwargs), **fit_kwargs
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1051, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1109, in compute_loss
      return self.compiled_loss(
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/losses.py", line 142, in __call__
      losses = call_fn(y_true, y_pred)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/losses.py", line 268, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/devkelvin/.local/lib/python3.10/site-packages/keras/losses.py", line 1469, in mean_squared_error
      y_true = tf.cast(y_true, y_pred.dtype)
Node: 'mean_squared_error/Cast'
Cast string to float is not supported
	 [[{{node mean_squared_error/Cast}}]] [Op:__inference_train_function_47348]


In [None]:
keras_export = keras.export_model()
keras_export.summary()