# Leafy Air ML Model Training
**This notebook showcases how you can train, save, and deploy a predictive model for flight costs on Vertex AI**

Import required libraries

In [4]:
import pandas as pd
import random
import datetime
import numpy as np
from bson import ObjectId
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow import keras
from google.colab import auth
from google.cloud import storage
from google.cloud import aiplatform

Generate synthetic data to train the model on

In [5]:
# In our case, we used the model to listen to real-time data from pub/sub. Therefore, at first we generated a message and then transformed it into the training data
# You can skip this part and generate the data directly as a pandas dataframe, use your custom data, or take the data from open-source datasets

def generate_random_path():
    return [[random.uniform(-90, 90), random.uniform(-180, 180)] for _ in range(3)]

def generate_random_message(flight_id):
    ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
    path = generate_random_path()
    disrupted = random.choice([True, False])
    extra_length = random.uniform(10, 20)
    distance_to_arrival = random.uniform(300, 1500)
    location = {
        'lat': random.uniform(-90, 90),
        'long': random.uniform(-180, 180)
    }
    velocity = {
        'speed': random.uniform(200, 300),
        'heading': 'tbd'  # 'tbd' is a placeholder
    }

    message = {
        'flight_id': flight_id,
        'ts': ts,
        'path': path,
        'disrupted': disrupted,
        'extra_length': extra_length,
        'distance_to_arrival': distance_to_arrival,
        'location': location,
        'velocity': velocity
    }

    return message

def convert_message_to_df_row(message):
    ts = message['ts']
    distance_to_destination = message['distance_to_arrival']
    estimated_time_left = distance_to_destination / message['velocity']['speed']
    delay_time = message['extra_length'] / message['velocity']['speed']
    delay_cost = delay_time * 105 * 60
    fuel_cost_per_hour = 1500
    total_cost_per_hour = fuel_cost_per_hour + delay_cost
    lat = message['location']['lat']
    long = message['location']['long']
    speed = message['velocity']['speed'] * 3.6
    extra_length = message['extra_length']
    total_cost = (fuel_cost_per_hour * estimated_time_left) + delay_cost

    row = {
        'Timestamp': ts,
        'Distance_to_Destination': distance_to_destination,
        'Estimated_Time_Left': estimated_time_left,
        'Delay_Time': delay_time,
        'Delay_Cost': delay_cost,
        'Fuel_Cost_per_Hour': fuel_cost_per_hour,
        'Total_Cost_per_Hour': total_cost_per_hour,
        'Latitude': lat,
        'Longitude': long,
        'Speed' : speed,
        'Extra_Length' : extra_length,
        'Total_Cost': total_cost,
        '_id': ObjectId()
    }

    return row

# Number of synthetic messages to generate
num_messages = 100

# List to hold generated data
data = []

# Generating synthetic data
flight_id = ObjectId()  # Unique flight ID
for _ in range(num_messages):
    message = generate_random_message(flight_id)
    df_row = convert_message_to_df_row(message)
    data.append(df_row)

# Converting list of dictionaries to DataFrame
df = pd.DataFrame(data)

# Displaying the DataFrame
df.head()

Unnamed: 0,Timestamp,Distance_to_Destination,Estimated_Time_Left,Delay_Time,Delay_Cost,Fuel_Cost_per_Hour,Total_Cost_per_Hour,Latitude,Longitude,Speed,Extra_Length,Total_Cost,_id
0,2024-08-19 10:53:25.267913+00:00,1201.205588,5.459381,0.067084,422.630195,1500,1922.630195,-42.3065,-130.161444,792.093465,14.760256,8611.701893,66c324258e468500683797b1
1,2024-08-19 10:53:25.267952+00:00,804.291974,2.818423,0.069158,435.694072,1500,1935.694072,1.248617,-84.634812,1027.330363,19.735527,4663.327964,66c324258e468500683797b2
2,2024-08-19 10:53:25.267973+00:00,1080.417385,3.853921,0.066383,418.214568,1500,1918.214568,-53.866992,116.172625,1009.232683,18.610045,6199.095406,66c324258e468500683797b3
3,2024-08-19 10:53:25.267991+00:00,1326.461007,5.476853,0.049516,311.953554,1500,1811.953554,11.278643,80.720979,871.898439,11.992585,8527.233127,66c324258e468500683797b4
4,2024-08-19 10:53:25.268007+00:00,410.509086,1.546813,0.070208,442.310863,1500,1942.310863,-82.647224,-121.051507,955.404852,18.632537,2762.530462,66c324258e468500683797b5


Build and train a tensorflow model

In [None]:
# Features and target
X = df[['Speed', 'Distance_to_Destination', 'Extra_Length']].values
y = df['Total_Cost'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model2 = Sequential()
model2.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model2.add(Dense(32, activation='relu'))
model2.add(Dense(1))  # Output layer for regression

# Compile the model
model2.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model2.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2)

# Predict on the test set
y_pred = model2.predict(X_test).flatten()

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Mean Squared Error: 888952.3132850232
R-squared: 0.8637520860621728


Make a prediction on one data point

In [None]:
# Simulating the JSON-like data
json_data = {
  "instances": [
    [250, 500, 15]
  ]
}

# Extract the data from JSON
new_data = np.array(json_data["instances"])

# Make predictions
predictions = model2.predict(new_data).flatten()

print(predictions)  # This will output the predicted total cost for the new data point


[3036.3599]


Log into Google Cloud, configure the project, and save the model to the corresponding bucket

In [None]:
auth.authenticate_user()

project_id = 'connected-aircraft-ist'
!gcloud config set project {project_id}

# Define the bucket and folder
BUCKET_NAME = 'connected-aircraft-ist-eu-notebooks'
FOLDER_NAME = 'leafyair_model3'

# Save the model directly to GCS
GS_PATH = f'gs://{BUCKET_NAME}/{FOLDER_NAME}/model.keras'
tf.saved_model.save(model2, GS_PATH)

print(f'Model saved to {GS_PATH}')


Model saved to gs://connected-aircraft-ist-eu-notebooks/leafyair_model3/model.keras


Upload the model to the Model Registry

In [None]:
aiplatform.init(project='connected-aircraft-ist', location='europe-west1')

model_display_name = 'leafyair_model_keras3'
model_description = 'keras model2 for airlines'
bucket_name = 'connected-aircraft-ist-eu-notebooks'
model_path = 'leafyair_model3/model.keras'

model = aiplatform.Model.upload(
    display_name=model_display_name,
    artifact_uri=f'gs://{bucket_name}/{model_path}/',
    serving_container_image_uri='us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-6:latest',
    description=model_description,
)

INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/502454695591/locations/europe-west1/models/9193504902997868544/operations/1809849190755336192
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/502454695591/locations/europe-west1/models/9193504902997868544@1
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/502454695591/locations/europe-west1/models/9193504902997868544@1')


Deploy the model! You can follow [this guide](https://cloud.google.com/vertex-ai/docs/general/deployment) to deploy the model to a live endpoint.
