In [1]:
import pandas as pd 
pd.set_option('display.max_columns', None)
import numpy as np
import os

import plotly.express as px
import plotly.graph_objects as go




In [2]:
filenames = ["data/msn_02_fuel_leak_signals_preprocessed.csv", "data/msn_10_fuel_leak_signals_preprocessed.csv", "data/msn_11_fuel_leak_signals_preprocessed.csv", "data/msn_12_fuel_leak_signals_preprocessed.csv", "data/msn_14_fuel_leak_signals_preprocessed.csv", "data/msn_29_fuel_leak_signals_preprocessed.csv", "data/msn_37_fuel_leak_signals_preprocessed.csv", "data/msn_53_fuel_leak_signals_preprocessed.csv"]
valid_cols = ['UTC_TIME', 'FUEL_USED_2', 'FUEL_USED_3', 'FUEL_USED_4',
       'FW_GEO_ALTITUDE', 'VALUE_FOB', 'VALUE_FUEL_QTY_CT',
       'VALUE_FUEL_QTY_FT1', 'VALUE_FUEL_QTY_FT2', 'VALUE_FUEL_QTY_FT3',
       'VALUE_FUEL_QTY_FT4', 'VALUE_FUEL_QTY_LXT', 'VALUE_FUEL_QTY_RXT',
       'FLIGHT_PHASE_COUNT', 'FUEL_USED_1', 'Flight', 'MSN']
datasets = []

for filename in filenames:
    datasets.append(pd.read_csv(filename, sep=";"))


In [2]:
data = pd.read_csv("data/msn_02_fuel_leak_signals_preprocessed.csv", sep=";")
data.shape

(623580, 111)

In [3]:
def Show_me_the_basics(df):
    resdf = pd.DataFrame()
    resdf['Column'] = df.columns
    resdf['Type'] = df.dtypes.values
    resdf['Rows'] = df.shape[0]
    resdf['# NaNs'] = df.isna().sum().values
    resdf['% NaNs'] = resdf['# NaNs'] / resdf['Rows'] *100
    resdf['# Uniques'] = df.nunique().values
    resdf['Uniques'] = [list(df[col].unique()) if df[col].nunique() < 13 else 'Too many to list' for col in df.columns]
    return resdf

In [4]:
data.UTC_TIME = pd.to_datetime(data.UTC_TIME)

In [5]:
nansdata = Show_me_the_basics(data)
#pd.set_option('display.max_rows', None)
nansdata

Unnamed: 0,Column,Type,Rows,# NaNs,% NaNs,# Uniques,Uniques
0,UTC_TIME,datetime64[ns],623580,0,0.0,610106,Too many to list
1,MSN,object,623580,0,0.0,1,[A400M-0002]
2,Flight,object,623580,0,0.0,42,Too many to list
3,ENGINE_RUNNING_1,bool,623580,0,0.0,2,"[True, False]"
4,ENGINE_RUNNING_2,bool,623580,0,0.0,2,"[True, False]"
5,ENGINE_RUNNING_3,bool,623580,0,0.0,2,"[True, False]"
6,ENGINE_RUNNING_4,bool,623580,0,0.0,2,"[True, False]"
7,FLIGHT_PHASE_COUNT,float64,623580,62922,10.090445,12,"[nan, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."
8,FUEL_FLOW_1,float64,623580,108643,17.422464,176435,Too many to list
9,FUEL_FLOW_2,float64,623580,106986,17.15674,177250,Too many to list


### Lets add some features

### ANALYZING FLIGHTS

In [7]:
def flight_basics(df):
    flightnumls = list(df.Flight.unique())
    resdfls = []
    for flightnum in flightnumls:
        df_flight = df[df.Flight == flightnum]
        df_flight_basics = Show_me_the_basics(df_flight)
        resdfls.append((flightnum, df_flight_basics))

    return resdfls



In [16]:
flight_basic_data = flight_basics(data)
flight_basic_data[0]

('V0136',
                                    Column            Type   Rows  # NaNs  \
 0                                UTC_TIME  datetime64[ns]  16934       0   
 1                                     MSN          object  16934       0   
 2                                  Flight          object  16934       0   
 3                        ENGINE_RUNNING_1            bool  16934       0   
 4                        ENGINE_RUNNING_2            bool  16934       0   
 5                        ENGINE_RUNNING_3            bool  16934       0   
 6                        ENGINE_RUNNING_4            bool  16934       0   
 7                      FLIGHT_PHASE_COUNT         float64  16934       1   
 8                             FUEL_FLOW_1         float64  16934       1   
 9                             FUEL_FLOW_2         float64  16934       1   
 10                            FUEL_FLOW_3         float64  16934       1   
 11                            FUEL_FLOW_4         float64  16934 

In [6]:
def make_flights_dfs_ls(df):
    flightnumls = list(df.Flight.unique())
    resdfls = []
    for flightnum in flightnumls:
        df_flight = df[df.Flight == flightnum]
        resdfls.append((flightnum,df_flight))
    return resdfls

In [8]:
dataperflight = make_flights_dfs_ls(data)

In [19]:

flightAltitudeescriptives = pd.DataFrame(columns=['Flight', 'count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max', ])

for flight, planedata in dataperflight:
    describeSeries = planedata.FW_GEO_ALTITUDE.describe()
    flightAltitudeescriptives = flightAltitudeescriptives.append({'Flight': flight, 
                                      'count': describeSeries['count'], 
                                      'mean': describeSeries['mean'], 'std': describeSeries['std'], 
                                      'min': describeSeries['min'], '25%': describeSeries['25%'], 
                                      '50%': describeSeries['50%'], '75%': describeSeries['75%'], 
                                      'max': describeSeries['max'],
                                        
                                        }, 
                                          ignore_index=True)
    
flightAltitudeescriptives

    

Unnamed: 0,Flight,count,mean,std,min,25%,50%,75%,max
0,V0136,16933.0,13694.095037,10695.967887,247.625,3223.125,15238.88,20429.25,34395.0
1,V0133,14322.0,12373.932404,11093.17161,248.875,3011.6875,14177.94,21048.095,33084.25
2,V0926,9154.0,3290.233113,3322.321754,534.6802,541.0,2228.3055,5044.69675,14324.73
3,V0837,18890.0,6606.764068,5067.151006,515.2942,542.106275,6295.355,9960.34475,15349.2
4,V0626,15178.0,750.230055,506.548976,493.0479,503.713125,507.17895,517.761625,2209.664
5,V0929,19292.0,13603.932474,8734.108658,83.71345,7615.4965,19455.71,20604.12,25810.08
6,V0095,19429.0,8426.912709,5613.799609,259.0,4275.125,9511.25,10794.63,26437.0
7,V0623,14871.0,17552.542622,12447.037082,85.87821,3059.273,25069.66,26874.445,31212.19
8,V0965,20826.0,8976.836524,9037.525004,86.0533,3708.68975,4848.6465,10296.8925,29080.0
9,V0344,11488.0,14140.174656,8920.751591,507.1674,1889.88775,20632.95,20659.65,20712.4


In [22]:

for flight, planedata in dataperflight:
    planedata.UTC_TIME = pd.to_datetime(planedata.UTC_TIME)
    flightduration = pd.Timedelta(planedata.UTC_TIME.max() - planedata.UTC_TIME.min())
    flightAltitudeescriptives.loc[flightAltitudeescriptives.Flight == flight, 'FlightDuration'] = flightduration

flightAltitudeescriptives['FlightSeconds'] = flightAltitudeescriptives['FlightDuration'].dt.total_seconds()
flightAltitudeescriptives['RowsPerSecond'] = flightAltitudeescriptives['count'] / flightAltitudeescriptives['FlightSeconds']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [70]:
def make_IQR_outlier_bound(descriptivesdf):
    descriptivesdf['IQR'] = descriptivesdf['75%'] - descriptivesdf['25%']
    descriptivesdf['UpperBound'] = descriptivesdf['75%'] + 1.5 * descriptivesdf['IQR']
    descriptivesdf['LowerBound'] = descriptivesdf['25%'] - 1.5 * descriptivesdf['IQR']
    return descriptivesdf

In [71]:
flightAltitudeescriptives = make_IQR_outlier_bound(flightAltitudeescriptives)
flightAltitudeescriptives.sort_values(by='FlightDuration', ascending=False)

Unnamed: 0,Flight,count,mean,std,min,25%,50%,75%,max,FlightDuration,FlightSeconds,RowsPerSecond,IQR,UpperBound,LowerBound
35,V0890,7054.0,154.799198,215.863005,90.58122,101.000325,104.06145,105.944325,1250.526,0 days 13:05:36,47136.0,0.149652,4.944,113.360325,93.584325
41,V0137,19906.0,12398.017367,7161.135012,244.875,8318.625,14800.565,17860.0975,24893.88,0 days 07:46:15,27975.0,0.711564,9541.4725,32172.30625,-5993.58375
20,V0135,20552.0,12232.790196,7512.963578,244.5,8203.0,14090.565,20272.5625,21313.25,0 days 07:38:58,27538.0,0.746314,12069.5625,38376.90625,-9901.34375
5,V0929,19292.0,13603.932474,8734.108658,83.71345,7615.4965,19455.71,20604.12,25810.08,0 days 07:27:31,26851.0,0.718483,12988.6235,40087.05525,-11867.43875
13,V0097,22165.0,15240.236372,13599.54781,0.0,273.25,15684.38,27423.63,35481.38,0 days 06:09:25,22165.0,1.0,27150.38,68149.2,-40452.32
8,V0965,20826.0,8976.836524,9037.525004,86.0533,3708.68975,4848.6465,10296.8925,29080.0,0 days 05:47:06,20826.0,1.0,6588.20275,20179.196625,-6173.614375
6,V0095,19429.0,8426.912709,5613.799609,259.0,4275.125,9511.25,10794.63,26437.0,0 days 05:23:49,19429.0,1.0,6519.505,20573.8875,-5504.1325
25,V0963,19290.0,18455.149344,12113.210588,-1097.592,5242.17875,22763.95,25922.325,34996.07,0 days 05:21:30,19290.0,1.0,20680.14625,56942.544375,-25778.040625
3,V0837,18890.0,6606.764068,5067.151006,515.2942,542.106275,6295.355,9960.34475,15349.2,0 days 05:14:50,18890.0,1.0,9418.238475,24087.702463,-13585.251438
27,V0966,17860.0,8377.91287,8256.743535,82.37575,2315.96425,4691.272,11625.4325,27692.29,0 days 04:57:40,17860.0,1.0,9309.46825,25589.634875,-11648.238125


#### Pick a specific flight

In [26]:
def pick_specific_flight(df, flightnum):
    singleflightdata = df[df.Flight == flightnum]
    print(f'\n YOU ARE LOOKING AT FLIGHT: {flightnum} \n')
    return singleflightdata

Flights I Checked-out trying to understand flights and when they are anomalous. 

- V0889: The flight has 2 times the rows than it has seconds, so i had a closer look. We just have duplicate timestamps We can just drop duplicates --> UTC TIME has duplicates. 


- V0890: This flight only has rows for 14% of its duration. here we need to dropna() theres a whole bunch of hours where we have time readings but no data. The data could then be visualized, however we see that altitude changes irregularly. we need to chek a little more to understand whats going on. We see the value_fob deceasing and an empty central tank. the LX and RX tank show irregular behavior that combined with the behvaior of the altitude makes me think that the plane has either in the landing (which is still strange given the 1.30h period in which altitude was 0) or the plane's fuel tanks are being emptied. For this i will have a closer look at engine data. Research describes the steps to the process of emptying fuel tanks as: Fuel Transfer, Fuel Drainage, Fuel Consumption, Fuel Venting. These steps are conducted generally during maintenance and fuel tank inspection. For this the engines are therefore turned on but the plane is on the ground. The way LXT and RXT dump fuel in the graph indicate to me the system is not pressurized. Looking at the engineered features said it all. we can see the rate at which value FOB is dissapearing is per second geater than the rate at which the engines are burning fuel, and this delta is increasing in size. 

So what characteized this maintenace check: Altitude behvaing irregularly, mainly staying below 1250 (which is its outlier, Upper IR bound 113). The DELTA_VFOBM_VS_SFUSED is increasing in size, meaning the rate at which fuel is going missing from Value_Fobs first instance is steadily greate than the sum of fuel used by the engines.

- V0929: This flight has rows only for 71% of its duration. Again we see data only after a long period of nan values, we therefore dropped nan values We see drastic changes in altitude in the duration of the flight, the plane dives to low altitude then increases its altitude again


- V0137: FIGHT GOING UP AND DOWN, NORMAL LOOKING TANK BEHVAIOR (TEST FLIGHT)

- V0135:

- V0624:

- V0835:

- V0097
- V0095
- V0138
- V0622
- V0925
- V0344 - NORMAL LOOKING ALTITUDE
- V0346 - SPIKE TOUCH AND GOES
- V0140 - Flight goes up doown then up again.
- V0925 - SPIKE Normal looking altitude
- V0926 - SPIKE BIG UP AND DOWN IN ALTITUDE 
- V0927 - 

MOST HAVE THIS STRANGE SPIKE IN FUEL USED WHEN TURNING ON THE ENGINE (WE SHOULD CHECK THE FLIGHT PHASE OF THIS). looks like its an effect of phase 1 and just spinning up the engines. Not all have this issue

In [459]:
flightIwant = "V0927"
singleflightdata = pick_specific_flight(data, flightIwant)


 YOU ARE LOOKING AT FLIGHT: V0926 



In [460]:
colsOfIntrest = ['UTC_TIME','FUEL_USED_1', 'FUEL_USED_2', 'FUEL_USED_3', 'FUEL_USED_4',
         'FW_GEO_ALTITUDE', 'VALUE_FOB', 'VALUE_FUEL_QTY_CT', 'VALUE_FUEL_QTY_FT1', 
            'VALUE_FUEL_QTY_FT2', 'VALUE_FUEL_QTY_FT3', 'VALUE_FUEL_QTY_FT4', 'VALUE_FUEL_QTY_LXT',
            'VALUE_FUEL_QTY_RXT', 'FLIGHT_PHASE_COUNT', 'Flight', 'MSN']

singleflightdata = singleflightdata[colsOfIntrest]

In [461]:
singleflightdata.shape

(9155, 17)

In [462]:
singleflightdata.dropna(inplace=True)

In [463]:
singleflightdata.FLIGHT_PHASE_COUNT = singleflightdata.FLIGHT_PHASE_COUNT.floordiv(1)

In [464]:
singleflightdata.shape

(6734, 17)

In [465]:
# SMOOTH

singleflightdata['UTC_TIME'] = pd.to_datetime(singleflightdata['UTC_TIME'])
singleflightdata.set_index('UTC_TIME', inplace=True)
singleflightdata = singleflightdata.resample('5S').mean()
singleglightdata = singleflightdata.interpolate(method='linear', limit_direction='both')
singleflightdata.reset_index(inplace=True)


In [466]:
singleflightdata.shape

(1348, 15)

In [467]:
# FEATURE ENGINEERING

singleflightdata['SUM_FUEL_USED'] = singleflightdata['FUEL_USED_1'] + singleflightdata['FUEL_USED_2'] + singleflightdata['FUEL_USED_3'] + singleflightdata['FUEL_USED_4']
singleflightdata['SUM_FUEL_QTY'] = singleflightdata['VALUE_FUEL_QTY_CT'] + singleflightdata['VALUE_FUEL_QTY_FT1'] + singleflightdata['VALUE_FUEL_QTY_FT2'] + singleflightdata['VALUE_FUEL_QTY_FT3'] + singleflightdata['VALUE_FUEL_QTY_FT4'] + singleflightdata['VALUE_FUEL_QTY_LXT'] + singleflightdata['VALUE_FUEL_QTY_RXT']
singleflightdata['VALUE_FOB_MISSING'] = singleflightdata['VALUE_FOB'].iloc[0] - singleflightdata['VALUE_FOB']
singleflightdata['DELTA_VFOB_VS_SQTY'] = singleflightdata['VALUE_FOB'] - singleflightdata['SUM_FUEL_QTY']
singleflightdata['DELTA_VFOBM_VS_SFUSED'] = singleflightdata['VALUE_FOB_MISSING'] - singleflightdata['SUM_FUEL_USED']

singleflightdata['ALTITUDE_DIFF'] = singleflightdata['FW_GEO_ALTITUDE'].diff()


In [468]:
phase_dict = {
    1: 'Pre-flight',
    2: 'Engine Run',
    3: 'Take-Off 1',
    4: 'Take-Off 2',
    5: 'Take-Off 3',
    6: 'Climbing 1',
    7: 'Climbing 2',
    8: 'Cruise',
    9: 'Descent',
    10: 'Approach',
    11: 'Landing',
    12: 'Post-flight'
}

phaselabels = [phase_dict.get(phase, 'Unknown') for phase in singleflightdata['FLIGHT_PHASE_COUNT']]

In [469]:
fig = go.Figure()

fig.add_trace(go.Pie(labels=phaselabels, values=singleflightdata['FLIGHT_PHASE_COUNT']))
fig.update_layout(title='FLIGHT_PHASE_COUNT', width=800, height=500, template='plotly_dark')

In [470]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.SUM_FUEL_USED, mode='lines', name='Fuel Used' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.SUM_FUEL_QTY, mode='lines', name='Fuel Quantity' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FOB_MISSING, mode='lines', name='Value FOB Missing' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FOB, mode='lines', name='Value FOB' ))

fig.update_layout(title=f'Flight {flightIwant} Engineered Features', template='plotly_dark')


In [471]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.ALTITUDE_DIFF, mode='lines', name='ALTITUDE DIFF' ))

fig.update_layout(title=f'Flight {flightIwant} Engineered Features', template='plotly_dark')


In [472]:
#phase_labels = [f'Phase {int(i)}' for i in singleflightdata.FLIGHT_PHASE_COUNT]

#fig = go.Figure()

#fig.add_trace(go.Scatter(
#   x=singleflightdata.UTC_TIME,
#    y=singleflightdata.ALTITUDE_DIFF,
#    mode='lines',
#    name='ALTITUDE DIFF',
#    line=dict(color=phase_labels, colorscale='Viridis', showscale=True)
#))

#fig.update_layout(
#    title=f'Flight {flightIwant} Engineered Features',
#    template='plotly_dark'
#)


In [473]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.DELTA_VFOB_VS_SQTY, mode='lines', name='Delta VFOB vs SQTY' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.DELTA_VFOBM_VS_SFUSED, mode='lines', name='Delta VFOBM vs SFUSED' ))

fig.update_layout(title=f'Flight {flightIwant} Engineered Features', template='plotly_dark')


In [474]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.FW_GEO_ALTITUDE, mode='lines', name='Altitude'))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FOB, mode='lines', name='Value FOB' ))

fig.update_layout(title=f'Flight {flightIwant} Altitude and Value FOB', template='plotly_dark')

In [475]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_CT, mode='lines', name='Central Tank'))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_LXT, mode='lines', name='Left Tank' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_RXT, mode='lines', name='Right Tank' ))

fig.update_layout(title=f'Flight {flightIwant} Fuel Quantities', template='plotly_dark')

In [476]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_FT1, mode='lines', name='Fuel Tank 1'))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_FT2, mode='lines', name='Fuel Tank 2' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_FT3, mode='lines', name='Fuel Tank 3' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.VALUE_FUEL_QTY_FT4, mode='lines', name='Fuel Tank 4' ))

fig.update_layout(title=f'Flight {flightIwant} Fuel Quantities', template='plotly_dark')

In [477]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.FUEL_USED_1, mode='lines', name='Fuel Used 1'))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.FUEL_USED_2, mode='lines', name='Fuel Used 2' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.FUEL_USED_3, mode='lines', name='Fuel Used 3' ))
fig.add_trace(go.Scatter(x=singleflightdata.UTC_TIME, y=singleflightdata.FUEL_USED_4, mode='lines', name='Fuel Used 4' ))

fig.update_layout(title=f'Flight {flightIwant} Fuel Used', template='plotly_dark')

In [478]:
singleflightdata.head(25)[['FLIGHT_PHASE_COUNT', 'UTC_TIME', 'ALTITUDE_DIFF', 'VALUE_FOB', 'FUEL_USED_1', 'FUEL_USED_2', 'FUEL_USED_3' , 'FUEL_USED_4']]

Unnamed: 0,FLIGHT_PHASE_COUNT,UTC_TIME,ALTITUDE_DIFF,VALUE_FOB,FUEL_USED_1,FUEL_USED_2,FUEL_USED_3,FUEL_USED_4
0,1.0,2016-10-07 07:36:15,,17930.333333,3827.901,3811.325,3795.018,3845.375
1,1.0,2016-10-07 07:36:20,-0.127107,17929.0,3827.901,3811.325,3795.018,3845.375
2,1.0,2016-10-07 07:36:25,-0.42544,17929.4,3827.901,3811.325,3795.018,3845.375
3,1.0,2016-10-07 07:36:30,-0.52666,17932.6,0.0,3811.325,3795.018,3845.375
4,1.0,2016-10-07 07:36:35,0.19764,17931.2,0.0,3811.325,3795.018,3845.375
5,1.0,2016-10-07 07:36:40,0.05254,17931.0,0.000525,3811.325,3795.018,3845.375
6,1.0,2016-10-07 07:36:45,-0.24856,17929.2,0.001283,3811.325,3795.018,3845.375
7,1.0,2016-10-07 07:36:50,-0.09276,17927.0,0.001372,3811.325,3795.018,3845.375
8,1.0,2016-10-07 07:36:55,0.05268,17926.2,0.001817,3811.325,3795.018,3845.375
9,1.0,2016-10-07 07:37:00,-0.0095,17927.8,0.002452,3811.325,3795.018,3845.375


#### Pick a random flight

In [9]:
import random
def pick_rand_flight(df):
    flightnum = random.choice(df.Flight.unique().tolist())
    flightdata = df[df.Flight == flightnum]
    print(f'\n YOU ARE LOOKING AT FLIGHT: {flightnum} \n')
    return flightdata


In [19]:
# BEWARE OF ThiS
singleflightdf = pick_rand_flight(data)


 YOU ARE LOOKING AT FLIGHT: V0926 



In [24]:
singleflightdf.FW_GEO_ALTITUDE.describe()

count     9154.000000
mean      3290.233113
std       3322.321754
min        534.680200
25%        541.000000
50%       2228.305500
75%       5044.696750
max      14324.730000
Name: FW_GEO_ALTITUDE, dtype: float64

In [13]:
flightdf.head()

Unnamed: 0,UTC_TIME,MSN,Flight,ENGINE_RUNNING_1,ENGINE_RUNNING_2,ENGINE_RUNNING_3,ENGINE_RUNNING_4,FLIGHT_PHASE_COUNT,FUEL_FLOW_1,FUEL_FLOW_2,FUEL_FLOW_3,FUEL_FLOW_4,FUEL_PITCH,FUEL_ROLL,FUEL_TRANSFER_MODE_VALUE,FUEL_USED_1,FUEL_USED_2,FUEL_USED_3,FUEL_USED_4,FW_GEO_ALTITUDE,LEAK_DETECTION_LEAK_FLOW,LSTU1_A_VOLUME,LSTU1_F_VOLUME,PITCH_ANGLE,ROLL_ANGLE,RSTU1_A_VOLUME,RSTU1_F_VOLUME,SELECTED_GADIR_ALTITUDE_VALUE,STATE_PMP_MAIN_FT1_ABNRM_ON,STATE_PMP_MAIN_FT1_IMMERSED,STATE_PMP_MAIN_FT1_ON,STATE_PMP_MAIN_FT2_ABNRM_ON,STATE_PMP_MAIN_FT2_IMMERSED,STATE_PMP_MAIN_FT2_ON,STATE_PMP_MAIN_FT3_ABNRM_ON,STATE_PMP_MAIN_FT3_IMMERSED,STATE_PMP_MAIN_FT3_ON,STATE_PMP_MAIN_FT4_ABNRM_ON,STATE_PMP_MAIN_FT4_IMMERSED,STATE_PMP_MAIN_FT4_ON,STATE_PMP_STBY_FT1_ABNRM_ON,STATE_PMP_STBY_FT1_IMMERSED,STATE_PMP_STBY_FT1_ON,STATE_PMP_STBY_FT2_ABNRM_ON,STATE_PMP_STBY_FT2_IMMERSED,STATE_PMP_STBY_FT2_ON,STATE_PMP_STBY_FT3_ABNRM_ON,STATE_PMP_STBY_FT3_IMMERSED,STATE_PMP_STBY_FT3_ON,STATE_PMP_STBY_FT4_ABNRM_ON,STATE_PMP_STBY_FT4_IMMERSED,STATE_PMP_STBY_FT4_ON,STATE_PMP_XFR_1_L_ABNRM_ON,STATE_PMP_XFR_1_L_IMMERSED,STATE_PMP_XFR_1_L_ON,STATE_PMP_XFR_2_L_ABNRM_ON,STATE_PMP_XFR_2_L_IMMERSED,STATE_PMP_XFR_2_L_ON,STATE_PMP_XFR_3_R_ABNRM_ON,STATE_PMP_XFR_3_R_IMMERSED,STATE_PMP_XFR_3_R_ON,STATE_PMP_XFR_4_R_ABNRM_ON,STATE_PMP_XFR_4_R_IMMERSED,STATE_PMP_XFR_4_R_LP,STATE_PMP_XFR_4_R_ON,STATUS_FUEL_LEAK_DETECTED_VALID,TRANSFER_MODE,VALUE_FOB,VALUE_FUEL_QTY_CC1,VALUE_FUEL_QTY_CC2,VALUE_FUEL_QTY_CC3,VALUE_FUEL_QTY_CC4,VALUE_FUEL_QTY_CT,VALUE_FUEL_QTY_FT1,VALUE_FUEL_QTY_FT2,VALUE_FUEL_QTY_FT3,VALUE_FUEL_QTY_FT4,VALUE_FUEL_QTY_LXT,VALUE_FUEL_QTY_RXT,day,month,time,year,APU_FUEL_FLOW_REQUEST_SIGNAL_1,EF1_Density,EF4_Density,RESOLVED_STATE_V_D,RESOLVED_STATE_V_LP1,RESOLVED_STATE_V_LP2,RESOLVED_STATE_V_LP3,RESOLVED_STATE_V_LP4,RESOLVED_STATE_V_RM,RESOLVED_STATE_V_RP,RESOLVED_STATE_V_T1,RESOLVED_STATE_V_T2,RESOLVED_STATE_V_T3,RESOLVED_STATE_V_T4,RESOLVED_STATE_V_X1,RESOLVED_STATE_V_X2,RESOLVED_STATE_V_X3,RESOLVED_STATE_V_X4,STATE_FUEL_QTY_ACCURACY_LST_DEGRADED,STATE_FUEL_QTY_ACCURACY_RST_DEGRADED,STATUS_FUEL_QTY_PART_UNUSABLE_LST,STATUS_FUEL_QTY_PART_UNUSABLE_RST,STATUS_FUEL_QTY_UNUSABLE_LST,STATUS_FUEL_QTY_UNUSABLE_RST,STATUS_OVERFLOW_LST,STATUS_OVERFLOW_RST,VALUE_FUEL_VOL_LST,VALUE_FUEL_VOL_RST
194861,2010-10-28 07:14:46,A400M-0002,V0097,True,True,True,True,,,,,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,,,,,,,,,,,,,,,28,10,07:14:46,2010,,,,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,,,,,,,,
194862,2010-10-28 07:14:47,A400M-0002,V0097,False,False,False,False,1.0,0.0,0.0,0.0,0.0,1.599987,0.580852,0.0,0.0,0.0,0.0,0.0,265.875,0.0,40.80074,28.61984,0.571289,0.527344,28.50451,39.44913,-181.0,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,1.0,,28611.0,290.0,239.0,243.0,294.0,9936.0,1704.0,1868.0,1678.0,1663.0,5946.0,5819.0,28,10,07:14:47,2010,,0.801546,0.809995,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.0,0.0,0.0,0.0,0.0,0.0,40.72209,0.0
194863,2010-10-28 07:14:48,A400M-0002,V0097,False,False,False,False,1.0,0.0,0.0,0.0,0.0,1.599987,0.580852,0.0,0.0,0.0,0.0,0.0,265.875,0.0,40.70916,28.56454,0.571289,0.527344,28.49164,39.33009,-181.0,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,1.0,,28611.0,290.0,239.0,243.0,294.0,9936.0,1704.0,1868.0,1678.0,1663.0,5946.0,5819.0,28,10,07:14:48,2010,,0.801544,0.809995,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.0,0.0,0.0,0.0,0.0,0.0,40.71533,0.0
194864,2010-10-28 07:14:49,A400M-0002,V0097,False,False,False,False,1.0,0.0,0.0,0.0,0.0,1.599987,0.580852,0.0,0.0,0.0,0.0,0.0,265.75,0.0,40.70884,28.31597,0.571289,0.527344,28.50263,39.37023,-181.0,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,1.0,,28611.0,290.0,239.0,243.0,294.0,9936.0,1704.0,1868.0,1678.0,1663.0,5946.0,5819.0,28,10,07:14:49,2010,,0.801546,0.809995,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.0,0.0,0.0,0.0,0.0,0.0,40.71258,0.0
194865,2010-10-28 07:14:50,A400M-0002,V0097,False,False,False,False,1.0,0.0,0.0,0.0,0.0,1.599987,0.580852,0.0,0.0,0.0,0.0,0.0,265.875,0.0,40.69891,28.05474,0.571289,0.527344,28.48396,39.40307,-181.0,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,1.0,,28611.0,290.0,239.0,243.0,294.0,9936.0,1704.0,1868.0,1678.0,1663.0,5946.0,5819.0,28,10,07:14:50,2010,,0.801541,0.809995,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.0,0.0,0.0,0.0,0.0,0.0,40.71855,0.0


In [14]:
// Make a dataframe showing the min, mean and max values for the geo alttude column for each flight_basic_data










SyntaxError: invalid syntax (<ipython-input-14-7cd13f2b7084>, line 1)

#### Standardized data