In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import json
from urllib.request import urlopen

In [62]:
# Get denormalized-flights data
def get_denorm_data(from_param, to_param) :
    denorm_url = 'http://sfuelepcn1.thaiairways.co.th:3001/denormalized-flights?'
    skip = 'skip=0'
    limit = 'limit=0'
    parameter = denorm_url + from_param +'&' + to_param + '&' + skip + '&' + limit
    denorm = pd.read_json(parameter)
    denorm.drop_duplicates(subset=['flight_date', 'flight_number', 'departure_aerodrome_icao_code', 'aircraft_registration'],inplace = True)
    return denorm

# User input eOFP (Set Limit = 1000, default = 50)
def get_oefp_user_input(skip = '0', limit = '500') :
    skip = str(skip)
    limit = str(limit)

    url = "https://tgeofp.rtsp.us/api/v1/userinputs?skip="+skip+"&limit="+limit
    return pd.read_json(url)

# Get all OFP data (Set limit = 1000, default = 150)
def get_ofp() :
    url = "https://tgeofp.rtsp.us/api/v1/ofp?limit=1000"
    return pd.read_json(url)

# Get JSON of ofp data by specific flightplan id
def get_ofp_by_flightplan(flightplan) :
    try:
        url = "https://tgeofp.rtsp.us/api/v1/ofp/" + flightplan
        response = urlopen(url)
        data = json.loads(response.read())
        return data
    except Exception as e:
        return None

def create_fuelreport_df(denorm_df):
    fuel_report_df = denorm_df[denorm_df.fuelreport.notna()]['fuelreport'].apply(pd.Series)
    fuel_report_df.drop_duplicates(subset=['dep', 'flight_number', 'flight_date', 'aircraft_registration'], inplace=True)
    return fuel_report_df

def count_fuel_data(fuel_report_df):
    """Return DataFram contain number of Fuel report data each month"""
    
    fuel_summary_df = fuel_report_df.sort_values(['flight_date', 'flight_number']).reset_index()[['flight_date','flight_number','aircraft_registration','dep']]
    month = []
    year = []
    for date in fuel_summary_df.flight_date:
        date_time = pd.to_datetime(date)
        month.append(date_time.month)
        year.append(date_time.year)

    fuel_summary_df['month'] = month
    fuel_summary_df['year'] = year
    return fuel_summary_df.groupby(['year','month'])['flight_number'].count()

def merge_flightPlan_eofp(eofp) :
    """ger eofp dataFrame then use each flight plan to get information from flighplan database"""
    ## Initialize dataframe with eOFP
    df = eofp

    ## Get OFP data to create joint column on denorm
    for index, row in df.iterrows() :
        flightplan = df.iloc[index]["flightPlan"]
        
        # Get ofp flight information
        ofp_json = get_ofp_by_flightplan(flightplan)

        # Drop Unmatch FlightPlan ID
        if ofp_json == None : 
            df.drop(index = index, axis = 0)
            continue

        dep = ofp_json["flight_key"]['departure_aerodrome']['value']
        arr = ofp_json["flight_key"]['arrival_aerodrome']['value']
        flt_no = "THA" + ofp_json["flight_key"]["flight_number"]
        flt_date = datetime.strptime(ofp_json["flight_key"]["flight_date"],"%Y-%m-%dZ")
        flt_date = flt_date.strftime("%Y-%m-%dT%H:%M:%S.000Z")
        ac_reg = ofp_json["aircraft"]["aircraft_registration"]
        aircraft_registration = ac_reg[:2] + "-" + ac_reg[2:]
        imported_time = datetime.strptime(ofp_json["imported_time"], "%Y-%m-%dT%H:%M:%S.%fZ")
        #imported_time = imported_time.strftime("%Y-%m-%dT%H:%M:%S.000Z") ## TO String

        # Insert new column
        df.loc[index, "departure_aerodrome_icao_code"] = dep
        df.loc[index, "arrival_aerodrome_icao_code"] = arr
        df.loc[index, "flight_number"] = flt_no
        df.loc[index, "flight_date"] = flt_date
        df.loc[index, "aircraft_registration"] = aircraft_registration
        df.loc[index, "ofp_imported_time"] = imported_time
    
    ## Trim only eOFP data with new inserted column
    data_list = [
        "flightPlan","userInput","plannedCheckPoint","createdAt","updatedAt","ofp_imported_time",
        "departure_aerodrome_icao_code", "arrival_aerodrome_icao_code",
        "aircraft_registration", "flight_date", "flight_number",
        ]
    df = df[data_list]
    return df

In [63]:
# Adjust to/from date
from_param = 'from=2022-04-01T00:00:00.000Z'
to_param = 'to=2022-05-31T23:59:59.000Z'

# Create DataFrame
## Fuel Report
-   **denorm_df** : denom data between concerned period
-   **fuel_report_df** : Create from expand column in fuel report from denorm_df

In [64]:
denorm_df = get_denorm_data(from_param, to_param)
fuel_report_df = create_fuelreport_df(denorm_df)

## OP data
-   op_data_df : read data from OPmovement.xlsx
-   total_flights : summary total of flights in each month

In [39]:
# read excel file
op_data_df = pd.read_excel('./data/OpMovement.xlsx',sheet_name = 'OpMovement', usecols = 'A:H')
op_data_df['Date'] = op_data_df.Date.apply(pd.to_datetime)

# append column month and year
years = []
months = []
for date in op_data_df['Date']:
    years.append(date.year)
    months.append(date.month)

op_data_df['year'] = years
op_data_df['month'] = months
op_data_df['Registration'] = 'HS-' + op_data_df.Registration

total_flights = op_data_df.groupby(['year', 'month'])['Flt No'].count()

## eOFP data
-   eofp_df : dataframe contains only eofp data
-   eofp_df_merged : eofp_df merage with data from flight plan database

In [40]:
# initialize number or eOFP report to be downloaded
total_data = len(fuel_report_df)
total_loop = total_data // 500

# load first chunk of eOFP data
eofp_df = get_oefp_user_input()

for i in np.arange(total_loop):
    limit = 500
    skip = 500 + 500 * i
    chunk = get_oefp_user_input(skip=skip, limit=limit)
    
    # # Don't append last chunk
    # if len(chunk) < 500:
    #     break

    eofp_df = pd.concat([eofp_df, chunk], ignore_index= True)

eofp_df_merged = merge_flightPlan_eofp(eofp_df)

## TGFR data
-   tgfr_df: data from Surver.csv. This file is no longer update.

In [7]:
tgfr_df = pd.read_csv('./data/Survey.csv',sep=';')
date_time = []
for date in tgfr_df['flightDate']:
    date_time.append(pd.to_datetime(date))
tgfr_df['flightDate'] = date_time
tgfr_df[['departure', 'arrival']] = tgfr_df['flightSector'].str.split('-', expand=True)
tgfr_columns = ['flightDate', 'flightNumber', 'acRegistration', 'departure', 'arrival',
    'plannedZFW', 'updatedZFW', 'ZFW',
    'plannedFlightTime', 
    'plannedTaxiFuel', 'plannedRampFuel', 'plannedTripFuel',
    'rampFuel','landingFuel','parkingFuel', 
    'offBlock', 'airborne', 'onGround', 'onBlock', 
    'takeOffFuel', 'planningType',
    'pfId', 'pmId', 'apuForParking']
tgfr_df = tgfr_df[tgfr_columns]

## QAR data
- qar_df

In [107]:
# import qar_data from denorm_df
qar_df = denorm_df.qar.apply(pd.Series)
qar_cols = ['actual_flight_time',
            'apu_used_during_taxi_in',
            'average_cruise_altitude',
            'climb_duration',
            'climb_fuel_burn',
            'cruise_duration',
            'cruise_fuel_burn',
            'cruise_distance',
            'descent_fuel_burn',
            'gross_weight',
            'landing_config_height',
            'landing_flaps',
            'one_engine_taxi_in',
            'overall_fuel_used',
            'pack_off_takeoff',
            'ramp_fuel',
            'reduced_flaps_landing',
            'stabilized_approach_height',
            'takeoff_flaps',
            'taxi_in_duration',
            'taxi_out_fuel',
            'thrust_reduction_height',
            'top_of_climb_altitude',
            'top_of_descent_altitude',
            'zero_fuel_weight',
            'auto_land_performed',
            'idle_reverse_landing_performed',
            'landing_sector_fuel_burn',
            'acceleration_height',]
# import flight date, flight number, dep, arr, aircraft regis from denorm_df
qar_df = qar_df[qar_cols]
qar_df[
    [
        'flight_date',
        'flight_number',
        'departure_aerodrome_icao_code',
        'aircraft_registration',
        'aircraft_type'
        ]
        ] = denorm_df[
    [
        'flight_date',
        'flight_number',
        'departure_aerodrome_icao_code',
        'aircraft_registration',
        'aircraft_aircraft_config_fleet_name']]

# # Remove "-" from aircraft registration
# qar_df['aircraft_registration'] = qar_df['aircraft_registration'].str.replace('-',"")

# Fuel initiative reports

In [82]:
qar_df.columns

Index(['actual_flight_time', 'apu_used_during_taxi_in',
       'average_cruise_altitude', 'climb_duration', 'climb_fuel_burn',
       'cruise_duration', 'cruise_fuel_burn', 'cruise_distance',
       'descent_fuel_burn', 'gross_weight', 'landing_config_height',
       'landing_flaps', 'one_engine_taxi_in', 'overall_fuel_used',
       'pack_off_takeoff', 'ramp_fuel', 'reduced_flaps_landing',
       'stabilized_approach_height', 'takeoff_flaps', 'taxi_in_duration',
       'taxi_out_fuel', 'thrust_reduction_height', 'top_of_climb_altitude',
       'top_of_descent_altitude', 'zero_fuel_weight', 'auto_land_performed',
       'idle_reverse_landing_performed', 'landing_sector_fuel_burn',
       'acceleration_height', 'flight_date', 'flight_number',
       'departure_aerodrome_icao_code', 'aircraft_registration',
       'aircraft_type'],
      dtype='object')

In [108]:
# fuel initiatives report
import qardata as qar
fuel_initiative_report = qar_df.copy()

# change B787 aircraft type
for i, row in fuel_initiative_report.iterrows():
    fuel_initiative_report.loc[i, 'aircraft_type'] = qar.b787_type(row[32], row[33])

In [106]:
qar.b787_type(fuel_initiative_report.aircraft_registration.loc[2145], fuel_initiative_report.aircraft_type.loc[2145])

fuel_initiative_report.aircraft_registration.loc[2145][3:5]

'WA'

In [109]:
fuel_initiative_report[['aircraft_registration', 'aircraft_type']].sample(20)

Unnamed: 0,aircraft_registration,aircraft_type
3039,HS-TKW,B777
348,HS-TKZ,B777
2674,HS-TKO,B777
1049,HS-THM,A350
1555,HS-TKL,B777
541,HS-THL,A350
2309,HS-TKK,B777
634,HS-TQD,B788
676,HS-TJW,B777
164,HS-THL,A350
