#### Library import

In [None]:
import uuid

from functions import A_preprocessing as preprocessing
from functions import B_create_feature_store as feature_store
from functions import C_task1_model_train as create_model
from functions import D_task2 as task2
from functions import deploy_model as deploy

## 1. Pre-processing

In [None]:
df = preprocessing.read_dataset("data/raw_dataset/full_data_flightdelay.csv")

In [None]:
df.head()

In [None]:
preprocessing.remove_null_values(df)

In [None]:
#preprocessing.delayed_flights(df)

In [None]:
preprocessing.create_part_of_day_column(df)
#preprocessing.show_heatmap_part_of_day(df)

In [None]:
#df.head()

df['ID'] = [uuid.uuid4() for _ in range(len(df))]
df['ID'] = df['ID'].astype(str)

weather_columns = ['ID', 'PRCP', 'SNOW', 'SNWD', 'TMAX', 'AWND']
weather_df = df[weather_columns]

weather_feature_descriptions = [
    {"name": "id", "description": "flight id"},
    {"name": "prcp", "description": "inches of precipitation for day"},
    {"name": "snow", "description": "inches of snowfall for day"},
    {"name": "snwd", "description": "inches of snow on ground for day"},
    {"name": "tmax", "description": "max temperature for day"},
    {"name": "awnd", "description": "max wind speed for day"},
]

flight_airport_columns = ['ID', 'MONTH', 'DAY_OF_WEEK', 'DISTANCE_GROUP', 'PART_OF_DAY', 'SEGMENT_NUMBER', 'CONCURRENT_FLIGHTS', 'NUMBER_OF_SEATS', 'CARRIER_NAME', 'AIRPORT_FLIGHTS_MONTH', 'AIRLINE_FLIGHTS_MONTH', 'AIRLINE_AIRPORT_FLIGHTS_MONTH', 'AVG_MONTHLY_PASS_AIRPORT', 'AVG_MONTHLY_PASS_AIRLINE', 'FLT_ATTENDANTS_PER_PASS', 'GROUND_SERV_PER_PASS', 'PLANE_AGE', 'DEPARTING_AIRPORT', 'LATITUDE', 'LONGITUDE', 'PREVIOUS_AIRPORT']
flight_airport_df = df[flight_airport_columns]

flight_airport_feature_descriptions = [
    {"name": "id", "description": "flight id"},
    {"name": "month", "description": "month"},
    {"name": "day_of_week", "description": "day of week"},
    {"name": "distance_group", "description": "distance group to be flown by departing aircraft"},
    {"name": "part_of_day", "description": "part of the day that the flight is scheduled for departure"},
    {"name": "segment_number", "description": "the segment that this tail number is on for the day"},
    {"name": "concurrent_flights", "description": "concurrent flights leaving from the airport in the same departure block"},
    {"name": "number_of_seats", "description": "number of seats on the aircraft"},
    {"name": "carrier_name", "description": "carrier"},
    {"name": "airport_flights_month", "description": "avg airport flights per month"},
    {"name": "airline_flights_month", "description": "avg airline flights per month"},
    {"name": "airline_airport_flights_month", "description": "avg flights per month for airline and airport"},
    {"name": "avg_monthly_pass_airport", "description": "avg passengers for the departing airport for the month"},
    {"name": "avg_monthly_pass_airline", "description": "avg passengers for airline for month"},
    {"name": "flt_attendants_per_pass", "description": "flight attendants per passenger for airline"},
    {"name": "ground_serv_per_pass", "description": "ground service employees (service desk) per passenger for airline"},
    {"name": "plane_age", "description": "age of departing aircraft"},
    {"name": "departing_airport", "description": "departing airport"},
    {"name": "latitude", "description": "latitude of departing airport"},
    {"name": "longitude", "description": "longitude of departing airport"},
    {"name": "previous_airport", "description": "previous airport that aircraft departed from"},
]

target_column = ['ID', 'DEP_DEL15']
target_df = df[target_column]

target_feature_descriptions = [
    {"name": "id", "description": "flight id"},
    {"name": "dep_del15", "description": "binary of a departure delay over 15 minutes (1 is yes)"},
]

## 2. Hopsworks connection

In [None]:
project, fs = feature_store.connect_to_hopsworks()

In [None]:
# feature_store.create_feature_group("flight_airport", 1, "Flight and Airport Information", ["id"], True, fs, flight_airport_df, flight_airport_feature_descriptions)
# feature_store.create_feature_group("weather", 1, "Weather Information", ["id"], True, fs, weather_df, weather_feature_descriptions)
# feature_store.create_feature_group("target", 1, "Target Information", ["id"], True, fs, target_df, target_feature_descriptions)

## 3. Task 1

In [None]:

# # Neural Network model
# X_train, X_test, y_train, y_test = preprocessing.prepare_data_for_ML_model(df, True)

# nn_model = create_model.define_NN_model(X_train)
# history = create_model.train_NN_model(nn_model, X_train, X_test, y_train, y_test)
# create_model.show_NN_performance(history)

# MLPClassifier model
df.drop(['ID'], axis=1, inplace=True)
X_train, X_test, y_train, y_test = preprocessing.prepare_data_for_ML_model(df)
mlp_model = create_model.define_MLP_classifier()
train_losses, test_losses, train_accuracies, test_accuracies = create_model.train_MLP_classifier(mlp_model, X_train, X_test, y_train, y_test)
create_model.show_MLP_performance(train_losses, test_losses, train_accuracies, test_accuracies)


In [None]:
create_model.save_model(mlp_model, "modello")

Deploy the model

In [None]:
deploy.deploy_model(project)

# Task 2

In [None]:
# df = preprocessing.read_dataset("data/raw_dataset/train_val.csv")
# df.drop(columns=['MONTH', 'DAY_OF_WEEK'], inplace=True)
df = task2.getDataset(fs)
preprocessing.create_part_of_day_column(df)

X_train, X_test, y_train, y_test = preprocessing.prepare_data_for_ML_model(df, predCol="PRCP", labelToClean=['DEPARTING_AIRPORT', 'PREVIOUS_AIRPORT', "PART_OF_DAY"])
task2.svm(X_train, X_test, y_train, y_test)