In [191]:
# Libs
import pandas as pd
import os 
import json
import matplotlib.pyplot as plt
import seaborn as sns

In [192]:
pwd = os.getcwd()
# Dataset
df = pd.read_csv(pwd + r"\Data\Data - Aircraft_Incident_Dataset.csv")
dataset = pd.read_csv(pwd + r"\Data\Data - Jadaero Parts Cleaned.csv")
dataset = dataset.copy()
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   PartNumber   500 non-null    object
 1   PartName     500 non-null    object
 2   Description  495 non-null    object
dtypes: object(3)
memory usage: 11.8+ KB


In [193]:
IC = df['Incident_Cause(es)'].unique().tolist()
IC

# IMPORTING CATEGORIES AND GIVE IT TO DEEPSEEK TO CREATE A DICTIONARY

['Airplane - Engines, Airplane - Engines - Prop/turbine blade separation, Collision - Object, Collision - Object - Bird, Result - Emergency, forced landing - On runway',
 'Airplane - Undercarriage, Airplane - Undercarriage - Landing gear collapse, Result - Runway mishap',
 'Cargo - Fire/smoke, Result - Damaged on the ground',
 'Result - Damaged on the ground',
 'Landing/takeoff - Tailstrike, Result - Runway mishap',
 'Result - Runway excursion',
 'Info-Unavailable',
 'Result - Runway mishap',
 'Result - Loss of control',
 'Result - Loss of control, Result - Loss of control (presumed)',
 'Landing/takeoff - Landing, Landing/takeoff - Landing - Heavy, Result - Runway mishap',
 'Result - Emergency, forced landing - Ditching',
 'Landing/takeoff - Takeoff - Aborted, Result - Runway excursion',
 'Landing/takeoff - Landing, Landing/takeoff - Landing - Wrong runway/taxiway, Result - Runway excursion',
 'Airplane - Engines, Airplane - Engines - All engine powerloss, Result - Emergency, forced la

In [194]:
# Load JSON file
with open(pwd + '\Dics\Dic1 - Airsystems.json', 'r') as file:
    aircraft_systems = json.load(file)

# Match part numbers and descriptions to categories
def categorize_part(part_name, description):
    description = str(description).lower()
    part_name = str(part_name).lower()
    
    for category, data in aircraft_systems.items():
        # Check if part name matches known patterns
        if any(common_part.lower() in part_name for common_part in data['common_parts']):
            return category
        # Check description keywords
        if any(keyword.lower() in description for keyword in data['keywords']):
            return category
        # Check part name keywords too
        if any(keyword.lower() in part_name for keyword in data['keywords']):
            return category
    return 'other'

def get_risk_level(part_name, description):
    # Get risk level for aircraft parts based on categorization"""
    category = categorize_part(part_name, description)
    
    if category in aircraft_systems:
        return aircraft_systems[category]['risk_level']
    else:
        return 'Unknown'

def get_primary_replacement_interval(part_name, description):
    # Get the most relevant replacement interval for a specific part"""
    category = categorize_part(part_name, description)
    part_name_upper = str(part_name).upper()
    
    if category in aircraft_systems:
        replacement_dict = aircraft_systems[category].get('replacement_interval', {})
        
        # Try to find the most specific match first
        for component, interval in replacement_dict.items():
            if any(word in part_name_upper for word in component.upper().split()):
                return interval
        
        # If no specific match, return the first available interval
        if replacement_dict:
            return list(replacement_dict.values())[0]
    
    return 'On-condition'

def get_boeing_targets(part_name, description):
    # Get Boeing targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('boeing_targets', []))
    return ''

def get_bombardier_targets(part_name, description):
    # Get Bombardier targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('bombardier_targets', []))
    return ''

def get_mro_targets(part_name, description):
    # Get MRO targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('mro_targets', []))
    return ''

def get_geo_targets(part_name, description):
    # Get geographical targets for the part category
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('geo_targets', []))
    return ''

# Apply to your DataFrame
dataset['DamageCategory'] = dataset.apply(lambda x: categorize_part(x['PartName'], x['Description']), axis=1)
dataset['risk_level'] = dataset.apply(lambda x: get_risk_level(x['PartName'], x['Description']), axis=1)
dataset['replacement_interval'] = dataset.apply(lambda x: get_primary_replacement_interval(x['PartName'], x['Description']), axis=1)
dataset['boeing_targets'] = dataset.apply(lambda x: get_boeing_targets(x['PartName'], x['Description']), axis=1)
dataset['bombardier_targets'] = dataset.apply(lambda x: get_bombardier_targets(x['PartName'], x['Description']), axis=1)
dataset['mro_targets'] = dataset.apply(lambda x: get_mro_targets(x['PartName'], x['Description']), axis=1)
dataset['geo_targets'] = dataset.apply(lambda x: get_geo_targets(x['PartName'], x['Description']), axis=1)

# View results
dataset[dataset['DamageCategory'] != 'other'].sample(10)

Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,boeing_targets,bombardier_targets,mro_targets,geo_targets
102,60-755100-5,FUEL BOOSTER PUMP,Part Number: 60-755100-5\r\rDescription: MOTOR...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
157,3289562-5,CONTROL VALVE PRE-COOLER,Part Number: 3289562-5\r\rDescription: VALVE A...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
356,342B040000,THERMOSTAT CONTROL,Part Number: 342B040000\r\rDescription: TEMPER...,air_conditioning,High,6 months (or ΔP > 4 inches H2O),"B737 ECS, B777 ECS, B787 ECS","CRJ ECS, Global ECS","ECS maintenance, Filter replacement, Temperatu...",Hot climates + cargo temperature-sensitive ope...
45,APT-482B-1250-60D,"TRANSDUCER, FUEL FILTER DP",Part Number: APT-482B-1250-60D\r\rDescription:...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
148,3810056-108,VALVE THRUST REVERSE,Part Number: 3810056-108\r\rDescription: VALVE...,engines,Extreme,On-condition (ultrasound every 500 cycles),"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha..."
184,G4000VSVA01,ACTUATOR STATOR,Part Number: G4000VSVA01\r\rDescription: ACTUA...,flight_controls,Extreme,"5,000hrs (leak > 3 drops/hr)","B737 FCC, B777 PFCS, B787 FBW","CRJ FCS, Global FBW","Flight control rigging, Actuator overhaul, Com...","High-frequency routes (US domestic, Europe-Mid..."
321,GG436-2048-7,RAM AIR VALVE,Part Number: GG436-2048-7\r\rDescription: VALV...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
294,473597-15,SMOKE DETECTOR,Part Number: 473597-15\r\rDescription: SMOKE D...,cargo_systems,Moderate,10 years (sensitivity test),"B747F cargo, B777F cargo, B767F cargo",DHC-8 cargo,"Cargo system maintenance, Door adjustment, Fir...",Hot climates + cargo temperature-sensitive ope...
110,9000408-22,TRANSMITTER,Part Number: 9000408-22\r\rDescription: FLAP P...,flight_management,Moderate,10 years (or MTBF),"B737 FMS, B777 FMS, B787 FMS","CRJ FMS, Global FMS","Avionics repair, Navigation calibration, Datab...","High-frequency routes (US domestic, Europe-Mid..."
177,6F4016-12,VALVE,Part Number: 6F4016-12\r\rDescription: DRAIN V...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...


In [195]:
dataset = dataset.rename(columns={'damage_category': 'DamageCategory'})
dataset[dataset['DamageCategory'] != 'other'].sample(10)

#RENAME COLUMN DamageCategory

Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,boeing_targets,bombardier_targets,mro_targets,geo_targets
254,522-2996-011,MARKER BEACON,Part Number: 522-2996-011\r\rDescription: RECE...,surveillance_systems,Moderate,12 years (power output test),"B737 surveillance, B777 surveillance, B787 sur...","CRJ surveillance, Global surveillance","Radar maintenance, Navigation aid test, Antenn...","Harsh environments (desert, coastal) + long-ha..."
152,4298117,"LIGHT,LANDING",Part Number: 4298117\r\rDescription: LIGHT-TAK...,cabin_systems,Low,On-condition,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...
357,65-73762-21,NLG ASSY,Part Number: 65-73762-21\r\rDescription: NOSE ...,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
10,3289614-3,VALVE ANTI-ICE,Part Number: 3289614-3\r\rDescription: VALVE A...,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
162,7510300-901,ADF ANTENNA,"Part Number: 7510300-901\r\rDescription: ASSY,...",flight_management,Moderate,10 years (or MTBF),"B737 FMS, B777 FMS, B787 FMS","CRJ FMS, Global FMS","Avionics repair, Navigation calibration, Datab...","High-frequency routes (US domestic, Europe-Mid..."
443,AE712890-1,HYDRO HOSE,"Part Number: AE712890-1\r\rDescription: HOSE, ...",hydraulic_systems,High,"5,000hrs (efficiency < 80%)","B737 hydraulics, B777 hydraulics, B787 hydraulics","CRJ hydraulics, Global hydraulics","Hydraulic component overhaul, System flushing,...","High-frequency routes (US domestic, Europe-Mid..."
233,R815505-6,PROP BLADE,Part Number: R815505-6\r\rDescription: BLADE -...,engines,Extreme,On-condition (ultrasound every 500 cycles),"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha..."
75,170-71180-409,NLG RETRACTION ACTUATOR,Part Number: 170-71180-409\r\rDescription: RET...,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
457,70720860-1,"ELECTRONIC CONTROL UNIT, APU",Part Number: 70720860-1\r\rDescription: ELECTR...,ground_support,Low,"On-condition (8,000hrs inspection)","All Boeing aircraft APU, Ground service equipment","All Bombardier aircraft APU, Ground service eq...","APU maintenance, Ground equipment service, Lin...","High-frequency routes (US domestic, Europe-Mid..."
189,029-658-0,NOSE TIRE,Part Number: 029-658-0\r\rDescription: TIRE 26...,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."


In [196]:
dataset[dataset['PartName'] == 'ENGINE NEW 675 HP']


Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,boeing_targets,bombardier_targets,mro_targets,geo_targets
499,PT6A-114A,ENGINE NEW 675 HP,,other,Unknown,On-condition,,,,


In [197]:
dataset['DamageCategory'].value_counts()

# SHOW THE UNIQUE VALUES IN THE DamageCategory COLUMN
#  27 ROWS FROM 500 ROWS
# AFTER CLEANING AND SERCHING FOR OTHER PARTS 
#  253 ROWS FROM 500 ROWS

DamageCategory
other                         274
fuel_systems                   56
engines                        32
flight_controls                21
landing_gear                   20
cabin_systems                  18
flight_management              18
hydraulic_systems              15
electrical_systems             10
cargo_systems                   8
air_conditioning                7
surveillance_systems            6
pneumatic_systems               6
regional_aircraft_specific      3
communication_systems           3
ground_support                  3
Name: count, dtype: int64

In [198]:
dataset['Description'] = dataset['Description'].str.split('Description:', n=1,expand=True)[1]  # split the Description column by '\r\r' and keep the second part
dataset.sample(2)

Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,boeing_targets,bombardier_targets,mro_targets,geo_targets
219,3107-89,ANTENNA,ANTENNA MARKER,flight_management,Moderate,10 years (or MTBF),"B737 FMS, B777 FMS, B787 FMS","CRJ FMS, Global FMS","Avionics repair, Navigation calibration, Datab...","High-frequency routes (US domestic, Europe-Mid..."
493,472247-1,PRESSURE SWITCH,"PRESSURE SWITCH, TEMPERATURE",other,Unknown,On-condition,,,,


In [199]:
dataset.columns

Index(['PartNumber', 'PartName', 'Description', 'DamageCategory', 'risk_level',
       'replacement_interval', 'boeing_targets', 'bombardier_targets',
       'mro_targets', 'geo_targets'],
      dtype='object')

In [200]:
dataset = dataset.rename(columns={'PartNumber':'Part Number','PartName': 'Part Name', 'Description': 'Description', 'DamageCategory': 'Air System Category', 'risk_level': 'Risk Level',
                                  'replacement_interval': 'Replacement Interval', 'boeing_targets': 'Boeing Targets', 'bombardier_targets': 'Bombardier Targets',
                                  'mro_targets': 'MRO Targets', 'geo_targets': 'Geographical Targets'})

In [201]:
dataset.sample(1)

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
256,277A6110-3,HUBCAP,HUBCAP,other,Unknown,On-condition,,,,


In [202]:
description_null = dataset[dataset['Description'].isnull()]
description_null

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply",,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
1,M20101-01,TIRE MAIN,,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
12,1159SCL404-5,BRAKE ASSEMBLY,,other,Unknown,On-condition,,,,
464,3876046-4,SOLENOID VALVE,,fuel_systems,High,"5,000hrs (flow < 90% spec)","B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
479,472420-2,FIRE EXTINGUISHER,,other,Unknown,On-condition,,,,
499,PT6A-114A,ENGINE NEW 675 HP,,other,Unknown,On-condition,,,,


In [203]:
dataset['Description'] = dataset['Description'].fillna(dataset['Part Name'])
dataset[dataset['Part Name'] == 'TIRE MAIN']

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
1,M20101-01,TIRE MAIN,TIRE MAIN,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."


In [204]:
dataset.to_csv(pwd + r"\Data\Data - Jadaero Parts Edited.csv", index=False)
# Save the categorized DataFrame to a new CSV file