In [1]:
import pandas as pd
import os
import json

In [2]:
pwd = os.getcwd() 
filepath = os.path.join(pwd, 'Data', 'Data - Airbus Airlines.csv')
filepath2 = os.path.join(pwd, 'Data', 'Data - Airbus_parts.csv')
filepathjson = os.path.join(pwd, 'Json', 'Json - Aircraft Systems.json')
dataset_airbus_airlines = pd.read_csv(filepath, encoding='latin1')
dataset_airbus_parts = pd.read_csv(filepath2)
dataset_replacement_interval = pd.read_json(filepathjson)


In [3]:
dataset_airbus_parts.info() # 2   Model Series  3398 non-null   object

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2185 entries, 0 to 2184
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Part Number     2185 non-null   object
 1   Description     2025 non-null   object
 2   Aircraft Model  2025 non-null   object
dtypes: object(3)
memory usage: 51.3+ KB


boeing data manipulation

In [7]:
dataset_airbus_parts.sample(2)

Unnamed: 0,Part Number,Description,Aircraft Model
246,D5323513600100,angle cabinet,"angle, assy"
1195,A25271392046,,


Adding aircraft system to the boeing part number database

In [8]:

# Load your aircraft systems JSON file
with open(filepathjson, 'r') as file:
    aircraft_systems = json.load(file)

def categorize_part(description):
    description = str(description).lower()
    
    for category, data in aircraft_systems.items():
        if any(common_part.lower() in description for common_part in data.get('common_parts', [])):
            return category
        if any(keyword.lower() in description for keyword in data.get('keywords', [])):
            return category
    return 'other'


def get_risk_level(description):
    category = categorize_part(description)
    return aircraft_systems.get(category, {}).get('risk_level', 'Unknown')

def get_priority(description):
    category = categorize_part(description)
    return aircraft_systems.get(category, {}).get('priority ', 'Unknown')


def get_primary_replacement_interval(description):
    category = categorize_part(description)
    replacement_dict = aircraft_systems.get(category, {}).get('replacement_interval', {})
    return list(replacement_dict.values())[0] if replacement_dict else 'On-condition'

def get_replacement_name_by_interval(description):
    category = categorize_part(description)
    interval = get_primary_replacement_interval(description)
    replacement_dict = aircraft_systems.get(category, {}).get('replacement_interval', {})
    for component, rep_interval in replacement_dict.items():
        if rep_interval == interval:
            return component
    return 'Unknown'

def get_geo_targets(description):
    category = categorize_part(description)
    return aircraft_systems.get(category, {}).get('geo_targets', [])

def get_countries(description):
    category = categorize_part(description)
    return aircraft_systems.get(category, {}).get('countries', [])

# Assuming your dataset is a pandas DataFrame with a 'Description' column
dataset_airbus_parts['Airsystem Category'] = dataset_airbus_parts['Description'].apply(categorize_part)
dataset_airbus_parts['Risk Level'] = dataset_airbus_parts['Description'].apply(get_risk_level)
dataset_airbus_parts['Priority'] = dataset_airbus_parts['Description'].apply(get_priority)
dataset_airbus_parts['Replacement Interval'] = dataset_airbus_parts['Description'].apply(get_primary_replacement_interval)
dataset_airbus_parts['Replacement Name'] = dataset_airbus_parts['Description'].apply(get_replacement_name_by_interval)
dataset_airbus_parts['Geo Location'] = dataset_airbus_parts['Description'].apply(get_geo_targets)    # <-- returns a list
dataset_airbus_parts['Countries'] = dataset_airbus_parts['Description'].apply(get_countries)        # <-- returns a list


In [9]:
dataset_airbus_parts.sample(10)

Unnamed: 0,Part Number,Description,Aircraft Model,Airsystem Category,Risk Level,Priority,Replacement Interval,Replacement Name,Geo Location,Countries
289,F5757410501800,fairing assembly,"fairing, assy",other,Unknown,Unknown,On-condition,Unknown,[],[]
101,F5451149020800,seal kit,seal assy,hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]","[Morocco, India, Brazil, United States, Russia]"
1932,F2818007100000,pipe assembly,pipe assembly,other,Unknown,Unknown,On-condition,Unknown,[],[]
638,A0041009100700,rod assembly,rod assembly,other,Unknown,Unknown,On-condition,Unknown,[],[]
1541,A5724294620200,seal kit,seal assy,hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]","[Morocco, India, Brazil, United States, Russia]"
2137,A2527285120000,angle cabinet,"angle, assy",other,Unknown,Unknown,On-condition,Unknown,[],[]
1896,A3251214820000,washer assembly,washer,other,Unknown,Unknown,On-condition,Unknown,[],[]
1461,A5211048222001,retainer lens,retainer,other,Unknown,Unknown,On-condition,Unknown,[],[]
393,A5461090150400,actuator kit,strip,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]","[Morocco, United States, Egypt, Canada, Germany]"
1604,D5744359601100,actuator kit,landing,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]","[Morocco, United States, Egypt, Canada, Germany]"


In [10]:
dataset_airbus_parts = dataset_airbus_parts.explode(column='Countries')
dataset_airbus_parts = dataset_airbus_parts[dataset_airbus_parts['Airsystem Category'] != 'other']
dataset_airbus_parts.sample(10)

Unnamed: 0,Part Number,Description,Aircraft Model,Airsystem Category,Risk Level,Priority,Replacement Interval,Replacement Name,Geo Location,Countries
1328,A2161010300000,duct assembly,duct assembly,environmental_control_system,Moderate,Moderate,Every 1000 flight hours,Filters,"[Tropical, Desert, Coastal, Urban]",Brazil
1905,A271102G220400,actuator kit,strip,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Egypt
476,D5335500100000,filter kit,filter air,hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]",Morocco
536,A2527642800000,actuator kit,diffuser,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Egypt
58,F0003053800000,hose alt cooling,"hose, assembly",hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]",Russia
309,D5755018420295,actuator kit,cleat,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Morocco
1510,D5347552921490,actuator kit,section,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Egypt
674,D2904003800200,half clamp,clamp assembly,electrical_system,Moderate,Critical,Every 2 years or when capacity drops below 80%,Batteries,"[Urban, Island, Coastal, Desert, Mountain]",Germany
1331,A2537132623400,seal kit,seal assy,hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]",Brazil
1238,A2781009400000,hose alt cooling,"hose, assembly",hydraulic_system,High,Critical,Every 2 years or on leak detection,Seals,"[Coastal, Desert, Tropical, Mountain]",Morocco


In [11]:
dataset_airbus_airlines

Unnamed: 0,Country,Airlines,Model Series,Region
0,France,Air France,A318,Europe
1,United Kingdom,British Airways,A318,Europe
2,United States,Frontier Airlines,A318,North America
3,Chile,Latam Airlines Group,A318,Latin America & Caribbean
4,United States,Nas Aviation Services,A318,North America
...,...,...,...,...
1650,Mexico,Volaris,A321,Latin America & Caribbean
1651,Spain,Vueling,A321,Europe
1652,Hungary,Wizz Air,A321,Europe
1653,United Arab Emirates,Wizz Air Abu Dhabi,A321,Middle East


modifie the countries (look to notes/country modified)

In [12]:
countries = {
        'Brunei Darussalam' : 'Brunei',
       'Congo, the Democratic Republic' : 'Democratic Republic of the Congo',
       'Iran, Islamic Republic of' : 'Iran',
       'Slovakia (Slovak Republic)' : 'Slovakia',
       'State of Libya' : 'Libya',
       'Tanzania, United Republic of' : 'Tanzania',
       'Unidentified' : 'Unknown Country',
       'USA' : 'United States'
}
dataset_airbus_airlines.replace({'Country': countries}, inplace=True)

Joining boeing part with boeing airlines and replcement intreval

In [13]:
dataset_airbus__merged1 = dataset_airbus_parts.merge(dataset_airbus_airlines, how='left', left_on='Countries', right_on='Country')
dataset_airbus__merged1.sample(10)



Unnamed: 0,Part Number,Description,Aircraft Model,Airsystem Category,Risk Level,Priority,Replacement Interval,Replacement Name,Geo Location,Countries,Country,Airlines,Model Series,Region
104528,D2557177322800,actuator kit,strip,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Germany,Germany,Lufthansa,A321neo,Europe
13868,A2527125301300,actuator kit,mask,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Germany,Germany,Condor,A320,Europe
43400,D9259003402200,actuator kit,psu ay,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",United States,United States,Global Crossing Airlines,A321,North America
61475,D521107D520000,half clamp,clamp assembly,electrical_system,Moderate,Critical,Every 2 years or when capacity drops below 80%,Batteries,"[Urban, Island, Coastal, Desert, Mountain]",United States,United States,Us Airways,A319ceo,North America
114589,F5777003520200,actuator kit,shim,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Germany,Germany,Germanwings,A320,Europe
109788,S1565-64309,bearing sleeve,"bearing, sleeve",engine_system,Critical,Critical,Every 1000 flight hours or on leak detection,Oil Seals,"[Desert, Coastal, Tropical, Mountain]",Morocco,Morocco,Royal Air Maroc,A321,Africa
92307,A2577120720300,actuator kit,section,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",United States,United States,Macquarie Aircraft Leasing Ltd,A320,North America
22275,F5332118321400,actuator kit,strip,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Germany,Germany,Sundair,A320,Europe
100483,99A53003001104,actuator kit,calibrat,landing_gear_system,High,Critical,Every 300-500 landings or on damage,Tires,"[Desert, Coastal, Urban, Mountain]",Germany,Germany,Thomas Cook Ag,A320,Europe
93943,F5218224220000,pushbutton switches,pusher,electrical_system,Moderate,Critical,Every 2 years or when capacity drops below 80%,Batteries,"[Urban, Island, Coastal, Desert, Mountain]",Germany,Germany,Germania,A321,Europe


rename the columns & export the data

In [14]:
dataset_airbus__merged1.columns

Index(['Part Number', 'Description', 'Aircraft Model', 'Airsystem Category',
       'Risk Level', 'Priority', 'Replacement Interval', 'Replacement Name',
       'Geo Location', 'Countries', 'Country', 'Airlines', 'Model Series',
       'Region'],
      dtype='object')

In [15]:
# Remove columns : Countries Country Model Series Region
dataset_airbus__merged1 = dataset_airbus__merged1[['Part Number', 'Description', 'Airlines', 'Model Series', 'Airsystem Category',
       'Risk Level', 'Priority', 'Replacement Interval', 'Replacement Name',
       'Geo Location']]
resultpath = os.path.join(pwd, 'Output', 'Data - Airbus.csv')
dataset_airbus__merged1.to_csv(resultpath, index=False )

In [16]:
dataset_airbus__merged1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128186 entries, 0 to 128185
Data columns (total 10 columns):
 #   Column                Non-Null Count   Dtype 
---  ------                --------------   ----- 
 0   Part Number           128186 non-null  object
 1   Description           128186 non-null  object
 2   Airlines              128046 non-null  object
 3   Model Series          128046 non-null  object
 4   Airsystem Category    128186 non-null  object
 5   Risk Level            128186 non-null  object
 6   Priority              128186 non-null  object
 7   Replacement Interval  128186 non-null  object
 8   Replacement Name      128186 non-null  object
 9   Geo Location          128186 non-null  object
dtypes: object(10)
memory usage: 9.8+ MB


melting the data

In [None]:
# ds = dataset.melt(id_vars=['airlines', 'Country', 'Region'],var_name='Model Type', value_name='Count') 
# ds[ds["Count"].isnull()] # Delete Opr Ord nan Del 
# # ds[ds["Model Type"].isnull()] # Replace .1 .2 .

# ds.to_csv(os.path.join(pwd, 'Data', 'Data - Airbus Airlines - Melted.csv'), index=False)
