In [1]:
import xgboost as xgb 
import numpy as np
import category_encoders as ce
import pickle
import joblib
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

model = xgb.Booster()
model.load_model("XGBoost.json")


encoder_binary = None
with open('encoder_binary.txt', "rb") as f:
    encoder_binary = pickle.load(f)


encoder_ordinal = None
with open('encoder_ordinal.txt', "rb") as f:
    encoder_ordinal = pickle.load(f)

scaler = None
#with open('scaler.txt', "rb") as f:
#    scaler = pickle.load(f)
    
with open('scaler.joblib', "rb") as f:
    scaler = joblib.load(f)

In [3]:
def is_transit(brand):
    if ',' in brand:
        return 1
    else:
        return 0

In [4]:
cities = ['Hà Nội', 'Đà Nẵng', 'Hồ Chí Minh', 'Phú Quốc', 'Vinh', 'Huế', 'Côn Đảo', 'Đà Lạt', 'Đồng Hới', 'Pleiku', 'Hải Phòng', 'Nha Trang', 'Chu Lai', 'Buôn Ma Thuột', 'Cần Thơ', 'Điện Biên Phủ', 'Cà Mau', 'Rạch Giá']
brands = ['Vietravel Airlines', 'VietJet Air', 'Vietnam Airlines',
       'A.P.G. Distribution System', 'VietJet Air, Vietnam Airlines',
       'Vietnam Airlines, VietJet Air', 'VietJet Air, Vietravel Airlines',
       'Vietnam Airlines, Vietravel Airlines',
       'Vietravel Airlines, VietJet Air',
       'Vietravel Airlines, Vietnam Airlines',
       'VietJet Air, Pacific Airlines',
       'Pacific Airlines, Vietravel Airlines', 'Pacific Airlines',
       'Vietravel Airlines, Pacific Airlines',
       'Vietnam Airlines, Pacific Airlines',
       'Pacific Airlines, Vietnam Airlines',
       'Pacific Airlines, VietJet Air',
       'A.P.G. Distribution System, Vietnam Airlines']
class_types = ['economy', 'business']
time_categories = ['Early Morning','Morning','Noon','Afternoon','Evening','Late Night']

In [5]:
def get_price(brand, date, dcity, acity, class_type, flytime_hours, dtime_category, atime_category):
    input_data = {
        'brand':[brand], 
        'dcity':[dcity],
        'acity':[acity],
        'class_type':[class_type],
        'flytime_hours':[flytime_hours],
        'ddate':[date],
        'dtime':[dtime_category],
        'atime':[atime_category],
    }
    df = pd.DataFrame(input_data)
    df['Transit'] = df['brand'].apply(is_transit)
    df[['dtime_category', 'atime_category']] = df[['dtime', 'atime']]
    df = df.drop(['dtime', 'atime'], axis=1)
    df['ddate'] = pd.to_datetime(df['ddate'])
    df['day_of_week'] = df['ddate'].dt.dayofweek
    df['month'] = df['ddate'].dt.month
    df['day_of_year'] = df['ddate'].dt.dayofyear
    df = encoder_binary.transform(df)
    df = encoder_ordinal.transform(df)
    df['flytime_hours'] = df['flytime_hours'].astype(np.float64)
    df['price'] = df['flytime_hours']
    df[['price', 'flytime_hours']] = scaler.transform(df[['price', 'flytime_hours']])
    df = df.drop(['price', 'ddate'], axis=1)
    
    df['price'] = model.predict(xgb.DMatrix(df))
    df['price'] = df['price'].astype(np.float64)
    df[['price', 'flytime_hours']] = scaler.inverse_transform(df[['price', 'flytime_hours']])
    return df['price'].to_numpy()[0]
    



In [8]:
import PySimpleGUI as sg
brand_choices = []
layout = [[sg.Text('Please fill out the following fields:')],
    [sg.Text('Brand',size=(20,1)), sg.Combo(key='brand', values=brands, default_value='Vietravel Airlines')],
    [sg.Text('Date',size=(20,1)), sg.InputText(key='date'),sg.CalendarButton("Select Date",close_when_date_chosen=True, target="date", format='%Y-%m-%d',size=(10,1))],
    [sg.Text('Departing city', size=(20,1)), sg.Combo(key='dcity', values=cities, default_value='Hà Nội')],
    [sg.Text('Landing city', size=(20,1)),sg.Combo(key='acity', values=cities, default_value='Hồ Chí Minh')],
    [sg.Text('Class', size=(20,1)),sg.Combo(key='class_type', values=class_types, default_value='economy')],
    [sg.Text('Fly time (hours)', size=(20,1)),sg.InputText(key='flytime_hours', default_text='0', enable_events=True)],
    [sg.Text('Departing time category', size=(20,1)),sg.Combo(key='dtime_category', values=time_categories, default_value='Early Morning')],
    [sg.Text('Landing time category', size=(20,1)),sg.Combo(key='atime_category', values=time_categories, default_value='Early Morning')],
    [sg.Button("Submit"), sg.Exit("Exit")],
    [sg.Text(key='output')]
]

# Create the window
window = sg.Window("Demo", layout)

# Create an event loop
while True:
    event, values = window.read()
    # End program if user closes window or
    # presses the OK button
    if event == 'flytime_hours' and values['flytime_hours'] and values['flytime_hours'][-1] not in ('0123456789.'):
        window['flytime_hours'].update(values['flytime_hours'][:-1])
    if event == "Exit" or event == sg.WIN_CLOSED:
        break
    if event == "Submit":
        try:
            float(values['flytime_hours'])
        except Exception:
            continue
        price = get_price(values['brand'], values['date'], values['dcity'], values['acity'], values['class_type'], values['flytime_hours'], values['dtime_category'], values['atime_category'])
        window['output'].update(f'{int(price):,}'+' đ')
        
window.close()

0    0.0
Name: flytime_hours, dtype: float64
0   -0.021138
Name: flytime_hours, dtype: float64
0    0.005843
Name: price, dtype: float64
0    840887.133467
Name: price, dtype: float64
