In [83]:
# Libs
import pandas as pd
import os 
import json


In [84]:
pwd = os.getcwd()
# Dataset
df = pd.read_csv(pwd + r"\Data\Data - Aircraft_Incident_Dataset.csv")
dataset = pd.read_csv(pwd + r"\Data\Data - Jadaero Parts Cleaned.csv")
client_dataset = pd.read_csv(pwd + r"\Data\Data - Clients.csv")
dataset = dataset.copy()
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   PartNumber   500 non-null    object
 1   PartName     500 non-null    object
 2   Description  495 non-null    object
dtypes: object(3)
memory usage: 11.8+ KB


In [85]:
IC = df['Incident_Cause(es)'].unique().tolist()
IC

# IMPORTING CATEGORIES AND GIVE IT TO DEEPSEEK TO CREATE A DICTIONARY

['Airplane - Engines, Airplane - Engines - Prop/turbine blade separation, Collision - Object, Collision - Object - Bird, Result - Emergency, forced landing - On runway',
 'Airplane - Undercarriage, Airplane - Undercarriage - Landing gear collapse, Result - Runway mishap',
 'Cargo - Fire/smoke, Result - Damaged on the ground',
 'Result - Damaged on the ground',
 'Landing/takeoff - Tailstrike, Result - Runway mishap',
 'Result - Runway excursion',
 'Info-Unavailable',
 'Result - Runway mishap',
 'Result - Loss of control',
 'Result - Loss of control, Result - Loss of control (presumed)',
 'Landing/takeoff - Landing, Landing/takeoff - Landing - Heavy, Result - Runway mishap',
 'Result - Emergency, forced landing - Ditching',
 'Landing/takeoff - Takeoff - Aborted, Result - Runway excursion',
 'Landing/takeoff - Landing, Landing/takeoff - Landing - Wrong runway/taxiway, Result - Runway excursion',
 'Airplane - Engines, Airplane - Engines - All engine powerloss, Result - Emergency, forced la

In [86]:
# Load JSON file
with open(pwd + '\Dics\Dic1 - Airsystems.json', 'r') as file:
    aircraft_systems = json.load(file)

# Match part numbers and descriptions to categories
def categorize_part(part_name, description):
    description = str(description).lower()
    part_name = str(part_name).lower()
    
    for category, data in aircraft_systems.items():
        # Check if part name matches known patterns
        if any(common_part.lower() in part_name for common_part in data['common_parts']):
            return category
        # Check description keywords
        if any(keyword.lower() in description for keyword in data['keywords']):
            return category
        # Check part name keywords too
        if any(keyword.lower() in part_name for keyword in data['keywords']):
            return category
    return 'other'

def get_risk_level(part_name, description):
    # Get risk level for aircraft parts based on categorization"""
    category = categorize_part(part_name, description)
    
    if category in aircraft_systems:
        return aircraft_systems[category]['risk_level']
    else:
        return 'Unknown'

def get_primary_replacement_interval(part_name, description):
    # Get the most relevant replacement interval for a specific part"""
    category = categorize_part(part_name, description)
    part_name_upper = str(part_name).upper()
    
    if category in aircraft_systems:
        replacement_dict = aircraft_systems[category].get('replacement_interval', {})
        
        # Try to find the most specific match first
        for component, interval in replacement_dict.items():
            if any(word in part_name_upper for word in component.upper().split()):
                return interval
        
        # If no specific match, return the first available interval
        if replacement_dict:
            return list(replacement_dict.values())[0]
    
    return 'On-condition'

def get_replacement_name_by_interval(part_name, description):
    category = categorize_part(part_name, description)
    interval = get_primary_replacement_interval(part_name, description)
    
    if category in aircraft_systems:
        replacement_dict = aircraft_systems[category].get('replacement_interval', {})
        
        # Reverse lookup: find component whose interval matches exactly the found interval
        for component, rep_interval in replacement_dict.items():
            if rep_interval == interval:
                return component
    
    return 'Unknown'


def get_boeing_targets(part_name, description):
    # Get Boeing targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('boeing_targets', []))
    return ''

def get_bombardier_targets(part_name, description):
    # Get Bombardier targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('bombardier_targets', []))
    return ''

def get_mro_targets(part_name, description):
    # Get MRO targets for the part category"""
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('mro_targets', []))
    return ''

def get_geo_targets(part_name, description):
    # Get geographical targets for the part category
    category = categorize_part(part_name, description)
    if category in aircraft_systems:
        return ', '.join(aircraft_systems[category].get('geo_targets', []))
    return ''

# Apply to your DataFrame
dataset['DamageCategory'] = dataset.apply(lambda x: categorize_part(x['PartName'], x['Description']), axis=1)
dataset['risk_level'] = dataset.apply(lambda x: get_risk_level(x['PartName'], x['Description']), axis=1)
dataset['replacement_interval'] = dataset.apply(lambda x: get_primary_replacement_interval(x['PartName'], x['Description']), axis=1)
dataset['replacement_name'] = dataset.apply(lambda x: get_replacement_name_by_interval(x['PartName'], x['Description']),axis=1)
dataset['boeing_targets'] = dataset.apply(lambda x: get_boeing_targets(x['PartName'], x['Description']), axis=1)
dataset['bombardier_targets'] = dataset.apply(lambda x: get_bombardier_targets(x['PartName'], x['Description']), axis=1)
dataset['mro_targets'] = dataset.apply(lambda x: get_mro_targets(x['PartName'], x['Description']), axis=1)
dataset['geo_targets'] = dataset.apply(lambda x: get_geo_targets(x['PartName'], x['Description']), axis=1)

# View results
dataset[dataset['DamageCategory'] != 'other'].sample(10)

Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,replacement_name,boeing_targets,bombardier_targets,mro_targets,geo_targets
233,R815505-6,PROP BLADE,Part Number: R815505-6\r\rDescription: BLADE -...,engines,Extreme,On-condition (ultrasound every 500 cycles),Fan Blades,"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha..."
307,066-50008-0407,RADAR TRANSCEIVER,Part Number: 066-50008-0407\r\rDescription: TR...,surveillance_systems,Moderate,12 years (power output test),Weather Radar,"B737 surveillance, B777 surveillance, B787 sur...","CRJ surveillance, Global surveillance","Radar maintenance, Navigation aid test, Antenn...","Harsh environments (desert, coastal) + long-ha..."
309,802170-11,SHUTOFF VALVE,Part Number: 802170-11\r\rDescription: VALVE-H...,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
188,P99C38-605,PUMP FUEL MAIN,Part Number: P99C38-605\r\rDescription: FUEL M...,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
4,89794077,CYLINDER & VALVE ASSY,Part Number: 89794077\r\rDescription: PORTABLE...,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
60,2293B020000,CHECK VALVE,Part Number: 2293B020000\r\rDescription: CHECK...,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
62,442653,HMU-HYDROMECHANICAL UNIT,Part Number: 442653\r\rDescription: HYDRO MECH...,engines,Extreme,On-condition (ultrasound every 500 cycles),Fan Blades,"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha..."
11,930-2000-001,TRANSCEIVER WEATHER RADAR,Part Number: 930-2000-001\r\rDescription: RECE...,surveillance_systems,Moderate,12 years (power output test),Weather Radar,"B737 surveillance, B777 surveillance, B787 sur...","CRJ surveillance, Global surveillance","Radar maintenance, Navigation aid test, Antenn...","Harsh environments (desert, coastal) + long-ha..."
434,8ES360000-00,BATTERY PACK,Part Number: 8ES360000-00\r\rDescription: BATT...,electrical_systems,High,2 years (or capacity < 80%),Batteries,"B737 electrical, B777 electrical, B787 electrical","CRJ electrical, Global electrical","Electrical component repair, Wire harness repl...",Older fleets needing upgrades + EM interferenc...
41,PR1826B1/4,SEALANT SEMKIT,Part Number: PR1826B1/4\r\rDescription: SEALAN...,engines,Extreme,On-condition (ultrasound every 500 cycles),Fan Blades,"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha..."


In [87]:
dataset[dataset['PartName'] == 'ENGINE NEW 675 HP']


Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,replacement_name,boeing_targets,bombardier_targets,mro_targets,geo_targets
499,PT6A-114A,ENGINE NEW 675 HP,,other,Unknown,On-condition,Unknown,,,,


In [88]:
dataset['DamageCategory'].value_counts()

# SHOW THE UNIQUE VALUES IN THE DamageCategory COLUMN
#  27 ROWS FROM 500 ROWS
# AFTER CLEANING AND SERCHING FOR OTHER PARTS 
#  253 ROWS FROM 500 ROWS

DamageCategory
other                         274
fuel_systems                   56
engines                        32
flight_controls                21
landing_gear                   20
cabin_systems                  18
flight_management              18
hydraulic_systems              15
electrical_systems             10
cargo_systems                   8
air_conditioning                7
surveillance_systems            6
pneumatic_systems               6
regional_aircraft_specific      3
communication_systems           3
ground_support                  3
Name: count, dtype: int64

In [89]:
dataset['Description'] = dataset['Description'].str.split('Description:', n=1,expand=True)[1]  # split the Description column by '\r\r' and keep the second part
dataset.sample(2)

Unnamed: 0,PartNumber,PartName,Description,DamageCategory,risk_level,replacement_interval,replacement_name,boeing_targets,bombardier_targets,mro_targets,geo_targets
460,3883297-7,FLOW DIVIDER ASSY,FLOW DIVIDER ASSY,other,Unknown,On-condition,Unknown,,,,
303,EVR750-03-0100A,TRANSCEIVER VHF,ENHANCED VHF TRANSCEIVER,other,Unknown,On-condition,Unknown,,,,


In [90]:
dataset.columns

Index(['PartNumber', 'PartName', 'Description', 'DamageCategory', 'risk_level',
       'replacement_interval', 'replacement_name', 'boeing_targets',
       'bombardier_targets', 'mro_targets', 'geo_targets'],
      dtype='object')

In [91]:
dataset = dataset.rename(columns={'PartNumber':'Part Number','PartName': 'Part Name', 'Description': 'Description', 'DamageCategory': 'Air System Category', 'risk_level': 'Risk Level',
                                  'replacement_interval': 'Replacement Interval', 'replacement_name': 'Replacement Name', 'boeing_targets': 'Boeing Targets', 'bombardier_targets': 'Bombardier Targets',
                                  'mro_targets': 'MRO Targets', 'geo_targets': 'Geographical Targets'})

In [92]:
dataset.sample(1)

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
231,930-2000-010,XMTR RCVR-WEATHER RADAR,TRANSCEIVER-WEATHER RADAR (RDR-4000),surveillance_systems,Moderate,12 years (power output test),Weather Radar,"B737 surveillance, B777 surveillance, B787 sur...","CRJ surveillance, Global surveillance","Radar maintenance, Navigation aid test, Antenn...","Harsh environments (desert, coastal) + long-ha..."


In [93]:
description_null = dataset[dataset['Description'].isnull()]
description_null

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply",,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
1,M20101-01,TIRE MAIN,,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."
12,1159SCL404-5,BRAKE ASSEMBLY,,other,Unknown,On-condition,Unknown,,,,
464,3876046-4,SOLENOID VALVE,,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...
479,472420-2,FIRE EXTINGUISHER,,other,Unknown,On-condition,Unknown,,,,
499,PT6A-114A,ENGINE NEW 675 HP,,other,Unknown,On-condition,Unknown,,,,


In [94]:
dataset['Description'] = dataset['Description'].fillna(dataset['Part Name'])
dataset[dataset['Part Name'] == 'TIRE MAIN']

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets
1,M20101-01,TIRE MAIN,TIRE MAIN,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid..."


In [95]:
geo_keywords = []

for geo_target in dataset['Geographical Targets']:
    geo_target = str(geo_target).lower()
    locations = set()  # Use a set to avoid duplicates
    
    if 'us domestic' in geo_target:
        locations.add('United States')
    if 'europe' in geo_target:
        locations.add('Europe')
    if 'middle east' in geo_target:
        locations.add('Middle East')
    if 'desert' in geo_target:
        # 'Sahara (Africa)', 'Sonoran (USA/Mexico)', 'Gobi (China)', 'Arabian Desert (Middle East)'
        locations.add('Desert Locations')
    if 'coastal' in geo_target:
        # 'Casablanca (Morocco)', 'Cape Town (South Africa)', 'Miami (USA)', 'Sydney (Australia)'
        locations.add('Coastal Cities')
    if 'long-haul' in geo_target:
        # 'Long-haul: Europe to Asia, Morocco to US'
        locations.add('Long-haul')
    if not locations:
        locations.add('Unknown')

   # Convert set to comma-separated string (no brackets)
    geo_keywords.append(list(locations))
    
dataset['Geographical Location'] = geo_keywords


In [96]:
dataset.sample(10)

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets,Geographical Location
246,315A2800-1,ACTUATOR,ACTUATOR - THRUST REVERSER NON-LOCKING,engines,Extreme,On-condition (ultrasound every 500 cycles),Fan Blades,"CFM56, GE90, Trent, PW4000, LEAP","PW100, CF34, BR700","Engine overhaul, Hot section repair, Component...","Harsh environments (desert, coastal) + long-ha...","[Desert Locations, Coastal Cities, Long-haul]"
406,AE0607F00,FAN,FAN ARINC,other,Unknown,On-condition,Unknown,,,,,[Unknown]
138,7514081-903,TCAS DIRECTIONAL ANTENNA,"NTENNAS,TCAS-RADIO/NAVIGATION SYS",flight_management,Moderate,10 years (or MTBF),FMC,"B737 FMS, B777 FMS, B787 FMS","CRJ FMS, Global FMS","Avionics repair, Navigation calibration, Datab...","High-frequency routes (US domestic, Europe-Mid...","[Europe, United States, Middle East, Long-haul]"
197,45-150-340,AOA SENSOR,"SENSOR, ANGLE OF ATTACK",other,Unknown,On-condition,Unknown,,,,,[Unknown]
412,1153222-2,PRESSURE SENSOR,"SENSOR - PRESSURE, BLEED",cabin_systems,Low,On-condition,Seat Mechanisms,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...,"[Europe, United States, Middle East, Long-haul]"
474,S5307300001400,TAIL CONE ASSY,TAIL CONE ASSY,other,Unknown,On-condition,Unknown,,,,,[Unknown]
367,GM127-1562-3,SPACER,SPACER,other,Unknown,On-condition,Unknown,,,,,[Unknown]
71,35518-48-400,SENSING ELEMENT,SENSING ELEMENT,other,Unknown,On-condition,Unknown,,,,,[Unknown]
224,ARG7041-1,NUT,"NUT, SIDE BRACE MLG",other,Unknown,On-condition,Unknown,,,,,[Unknown]
283,473597-2,SMOKE DETECTOR,"SMOKE DETECTOR, MODEL",cargo_systems,Moderate,10 years (sensitivity test),Smoke Detectors,"B747F cargo, B777F cargo, B767F cargo",DHC-8 cargo,"Cargo system maintenance, Door adjustment, Fir...",Hot climates + cargo temperature-sensitive ope...,"[Desert Locations, Coastal Cities, Long-haul]"


In [97]:


# Client Type 1: Regional Airlines
# Expanded geo pattern for Regional Airlines
geo1_pattern = r"(?i)high-frequency routes|regional routes|high-cycle operations"

clt1_mask = (
    dataset['Replacement Interval'].isin(['10 years', '15 years', '7 years']) |
    dataset['Geographical Targets'].str.contains('High-frequency routes') |
    dataset['Bombardier Targets'].str.contains('Regional aircraft') |
    dataset['Boeing Targets'].str.contains('Regional aircraft')
)



# Client Type 2: Cargo Airlines
Geo = 'Harsh environments|(desert, coastal)|long-haul operations|Hot climates|cargo temperature-sensitive operations'
clt2_mask = (
        dataset['Replacement Interval'].isin(['5,000hrs']) |
        dataset['Geographical Targets'].str.contains(Geo) |
        dataset['MRO Targets'].str.contains('Heavy maintenance')
)



# Client Type 3: MRO Service Providers
Geo2 = 'Safety-critical universal demand|harsh environments'
clt3_mask = (
        dataset['Replacement Interval'].isin(['6 months', '2,000hrs', '500 cycles']) |
        dataset['Geographical Targets'].str.contains(Geo2) |
        dataset['MRO Targets'].str.contains('Primary focus')
)


# Client Type 4: Legacy Fleet Operators
Geo3 = 'Older fleets needing upgrades|EM interference environments'
clt4_mask = (
        dataset['Geographical Targets'].str.contains(Geo3) |
        dataset['Boeing Targets'].str.contains('Older Boeing models')
)

# Step 1: Create a new column with default value (e.g. 0 for "Unclassified")
dataset['Client Number'] = 0
dataset.loc[clt1_mask, 'Client Number'] = 1
dataset.loc[clt2_mask, 'Client Number'] = 2
dataset.loc[clt3_mask, 'Client Number'] = 3
dataset.loc[clt4_mask, 'Client Number'] = 4

#Use (?i) for inline case-insensitive regex.


  dataset['Geographical Targets'].str.contains(Geo) |


In [98]:
dataset

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets,Geographical Location,Client Number
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3
1,M20101-01,TIRE MAIN,TIRE MAIN,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3
2,M01103-02,TIRE – MAIN,46x17.0R20 30PR 225mph Radial TL Michelin Air…,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3
3,114079019,"VALVE, LANDING GEAR SELECTOR",ELECTRO-HIDRAULIC SELECTORS,landing_gear,Extreme,"15,000 cycles (or structural cracks)",Gear Doors,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3
4,89794077,CYLINDER & VALVE ASSY,PORTABLE OXYGEN CYLINDER AND VALVE ASSEMBLY,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...,[Long-haul],2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,424634,CARTRIDGE 20PC,CARTRIDGE 20PC PACKAGE KIT 1.4C AS 1.4S,other,Unknown,On-condition,Unknown,,,,,[Unknown],0
496,G825100-4,FIREX,"FIREX, DISPOSABLE (POTTY)",other,Unknown,On-condition,Unknown,,,,,[Unknown],0
497,475089-1,FIREX 224 CU,FIREX 224 CU IN DUAL OUTLET,other,Unknown,On-condition,Unknown,,,,,[Unknown],0
498,474158-3,FIREX,FIREX DUAL OUTLET 1400 CU IN,other,Unknown,On-condition,Unknown,,,,,[Unknown],0


In [99]:
dataset_merged = dataset.merge(client_dataset, on='Client Number', how='left')
dataset_merged

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets,Geographical Location,Client Number,Client Role
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3,MRO Service Providers
1,M20101-01,TIRE MAIN,TIRE MAIN,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3,MRO Service Providers
2,M01103-02,TIRE – MAIN,46x17.0R20 30PR 225mph Radial TL Michelin Air…,landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3,MRO Service Providers
3,114079019,"VALVE, LANDING GEAR SELECTOR",ELECTRO-HIDRAULIC SELECTORS,landing_gear,Extreme,"15,000 cycles (or structural cracks)",Gear Doors,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...","[United States, Coastal Cities, Middle East, D...",3,MRO Service Providers
4,89794077,CYLINDER & VALVE ASSY,PORTABLE OXYGEN CYLINDER AND VALVE ASSEMBLY,fuel_systems,High,"5,000hrs (flow < 90% spec)",Fuel Pumps,"B737 fuel system, B777 fuel management, B787 fuel","CRJ fuel, Global fuel","Fuel system maintenance, Tank entry, Pump over...",Hot climates + cargo temperature-sensitive ope...,[Long-haul],2,Cargo Airlines
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,424634,CARTRIDGE 20PC,CARTRIDGE 20PC PACKAGE KIT 1.4C AS 1.4S,other,Unknown,On-condition,Unknown,,,,,[Unknown],0,Unclassified
496,G825100-4,FIREX,"FIREX, DISPOSABLE (POTTY)",other,Unknown,On-condition,Unknown,,,,,[Unknown],0,Unclassified
497,475089-1,FIREX 224 CU,FIREX 224 CU IN DUAL OUTLET,other,Unknown,On-condition,Unknown,,,,,[Unknown],0,Unclassified
498,474158-3,FIREX,FIREX DUAL OUTLET 1400 CU IN,other,Unknown,On-condition,Unknown,,,,,[Unknown],0,Unclassified


In [100]:
dataset_merged = dataset_merged.explode('Geographical Location')
dataset_merged


Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets,Geographical Location,Client Number,Client Role
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...",United States,3,MRO Service Providers
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...",Coastal Cities,3,MRO Service Providers
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...",Middle East,3,MRO Service Providers
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...",Desert Locations,3,MRO Service Providers
0,021-355-0,"TIRE 11.00-12 10PLY 160 mph, 10 Ply","TIRE 11.00-12 10PLY 160 mph, 10 Ply",landing_gear,Extreme,200-300 landings (or tread depth < 2mm),Tires,"B737, B777, B787, B747, B767","CRJ, DHC-8, Global","Heavy maintenance, Line maintenance, Component...","High-frequency routes (US domestic, Europe-Mid...",Europe,3,MRO Service Providers
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,424634,CARTRIDGE 20PC,CARTRIDGE 20PC PACKAGE KIT 1.4C AS 1.4S,other,Unknown,On-condition,Unknown,,,,,Unknown,0,Unclassified
496,G825100-4,FIREX,"FIREX, DISPOSABLE (POTTY)",other,Unknown,On-condition,Unknown,,,,,Unknown,0,Unclassified
497,475089-1,FIREX 224 CU,FIREX 224 CU IN DUAL OUTLET,other,Unknown,On-condition,Unknown,,,,,Unknown,0,Unclassified
498,474158-3,FIREX,FIREX DUAL OUTLET 1400 CU IN,other,Unknown,On-condition,Unknown,,,,,Unknown,0,Unclassified


In [101]:
dataset['Geographical Targets'].unique()

array(['High-frequency routes (US domestic, Europe-Middle East), Harsh environments (desert, coastal) + long-haul operations, High-cycle operations + harsh environments',
       'Hot climates + cargo temperature-sensitive operations, High-altitude routes + extreme temperature operations, Long-haul operations',
       '',
       'High-passenger-turnover routes + wear-intensive operations, Long-haul operations + passenger comfort, High-frequency routes (US domestic, Europe-Middle East)',
       'Harsh environments (desert, coastal) + long-haul operations, High-altitude routes + extreme temperature operations, Weather-intensive routes',
       'Harsh environments (desert, coastal) + long-haul operations, High-altitude routes + extreme temperature operations, Safety-critical universal demand + harsh environments',
       'High-frequency routes (US domestic, Europe-Middle East), High-cycle operations + temperature extremes, Harsh environments (desert, coastal) + long-haul operations',
     

In [102]:
dataset_merged['Client Role'].value_counts()

Client Role
MRO Service Providers     288
Unclassified              274
Cargo Airlines            231
Legacy Fleet Operators     94
Regional Airlines          72
Name: count, dtype: int64

In [103]:
dataset_merged[dataset_merged['Replacement Interval'] == '12 years (rubber degradation)']

Unnamed: 0,Part Number,Part Name,Description,Air System Category,Risk Level,Replacement Interval,Replacement Name,Boeing Targets,Bombardier Targets,MRO Targets,Geographical Targets,Geographical Location,Client Number,Client Role
395,MF10-08-01,OXYGEN MASK,"OXYGEN MASK, CREW",cabin_systems,Low,12 years (rubber degradation),Oxygen Masks,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...,Europe,1,Regional Airlines
395,MF10-08-01,OXYGEN MASK,"OXYGEN MASK, CREW",cabin_systems,Low,12 years (rubber degradation),Oxygen Masks,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...,United States,1,Regional Airlines
395,MF10-08-01,OXYGEN MASK,"OXYGEN MASK, CREW",cabin_systems,Low,12 years (rubber degradation),Oxygen Masks,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...,Middle East,1,Regional Airlines
395,MF10-08-01,OXYGEN MASK,"OXYGEN MASK, CREW",cabin_systems,Low,12 years (rubber degradation),Oxygen Masks,"B737 cabin, B777 cabin, B787 cabin","CRJ cabin, Global cabin","Cabin refurbishment, IFE upgrade, Seat repair",High-passenger-turnover routes + wear-intensiv...,Long-haul,1,Regional Airlines


In [104]:
dataset_merged.drop(columns='Client Number', inplace=True)

In [105]:
# print(dataset_merged['Geographical Location'].unique()) #Unknown
# print(dataset_merged['Risk Level'].unique()) #Unknown
# print(dataset_merged['Air System Category'].unique()) #other
# print(dataset_merged['Replacement Interval'].unique()) # On-condition
# print(dataset_merged['Client Role'].unique())  #'Unclassified

dataset_filtred = dataset_merged[
    (dataset_merged['Geographical Location'] != 'Unknown') &
    (dataset_merged['Risk Level'] != 'Unknown') & 
    (dataset_merged['Air System Category'] != 'other') &
    (dataset_merged['Replacement Interval'] != 'On-condition') &
        (dataset_merged['Replacement Name'] != 'Unknown') &
    (dataset_merged['Client Role'] != 'Unclassified')
]

In [106]:
dataset_filtred.to_csv(pwd + r"\Data\Data - Jadaero Parts Edited.csv", index=False)
# Save the categorized DataFrame to a new CSV file

In [107]:
dataset_filtred.info()

<class 'pandas.core.frame.DataFrame'>
Index: 607 entries, 0 to 481
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Part Number            607 non-null    object
 1   Part Name              607 non-null    object
 2   Description            607 non-null    object
 3   Air System Category    607 non-null    object
 4   Risk Level             607 non-null    object
 5   Replacement Interval   607 non-null    object
 6   Replacement Name       607 non-null    object
 7   Boeing Targets         607 non-null    object
 8   Bombardier Targets     607 non-null    object
 9   MRO Targets            607 non-null    object
 10  Geographical Targets   607 non-null    object
 11  Geographical Location  607 non-null    object
 12  Client Role            607 non-null    object
dtypes: object(13)
memory usage: 66.4+ KB


In [108]:
dataset_filtred['Replacement Name'].unique()

array(['Tires', 'Gear Doors', 'Fuel Pumps', 'Weather Radar', 'Fan Blades',
       'Pumps', 'Hydraulic Actuators', 'FMC', 'ACM Turbines',
       'Tank Seals', 'HEPA Filters', 'Trim Motors', 'Recirculation Fans',
       'Pressure Controllers', 'Fuel Nozzles', 'Cargo Doors',
       'Propellers', 'Ice Detectors', 'Smoke Detectors', 'Transponder',
       'GPS Receiver', 'Oxygen Masks', 'APU'], dtype=object)