In [None]:
from google.colab import files
uploaded = files.upload()

### Random Forest Classifier Model

In [None]:
# !pip install joblib
# !pip install skl2onnx



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import h5py

# Load the dataset
data = pd.read_csv('transportation.csv')

# Separate features and target variable
X = data[['departure_location', 'price']]  # Input features (user budget and starting location)
y = data['id']  # Target variable (id of transportation)

# Convert 'id' to categorical for classification
y = y.astype(str)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Preprocessing: One-hot encode the 'departure_location' column
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['departure_location'])
    ],
    remainder='passthrough'
)

# Model Training - RandomForestClassifier
classifier = RandomForestClassifier()
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', classifier)
])

model.fit(X_train, y_train)

# Save the trained classifier
classifier_filename = 'transportation_classifier.joblib'
joblib.dump(classifier, classifier_filename)

# Save preprocessing steps using h5py
preprocessor_filename = 'transportation_preprocessor.h5'
with h5py.File(preprocessor_filename, 'w') as hf:
    hf.attrs['description'] = 'Transportation Prediction Model Preprocessor'
    preprocessor_transformers = model.named_steps['preprocessor'].transformers_
    for name, transformer, columns in preprocessor_transformers:
        if name != 'remainder':
            grp = hf.create_group(name)
            grp.attrs['columns'] = columns
            transformer_key = f'{name}_transformer'
            joblib.dump(transformer, transformer_key)

### Deep Learning Model - On Development - Unfinished


In [None]:
# !pip install protobuf==3.20.2


In [None]:
import pandas as pd

In [None]:
# Display the first few rows of the DataFrame

data = pd.read_csv('transportation.csv')
print(data.head())


   id transportation_mode       provider  \
0   1               Plane      Wings Air   
1   2               Train         PT KAI   
2   3               Plane  Sriwijaya Air   
3   4               Plane       Citilink   
4   5               Train         PT KAI   

                                      starting_point transportation_class  \
0  Bandar Udara Frans Seda (Bandar Udara Wai Oti)...                Suite   
1                                     Parung Panjang                Suite   
2               Bandar Udara Internasional Kualanamu                Suite   
3  Bandar Udara Blangkejeren (Bandar Udara Gayo L...                Suite   
4                                         Karangantu             Economic   

    departure_location arrival_location     distance         price  
0  Nusa Tenggara Timur       Yogyakarta  1139.934101  2.564852e+06  
1                Bogor       Yogyakarta   422.782319  5.073388e+05  
2     Sumatera Selatan       Yogyakarta   877.938017  1.975361e+0

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# Load the dataset
data = pd.read_csv('transportation.csv')

# Separate features and target variable
X = data[['price', 'departure_location']]
y = data['id']

# Convert 'id' to categorical for classification
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Preprocessing: Standard scaling for all features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['price']),
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['departure_location'])
    ],
    remainder='passthrough'
)

X_train = preprocessor.fit_transform(X_train).toarray()
X_test = preprocessor.transform(X_test).toarray()

# Model Training - Neural Network
output_units = len(np.unique(y))
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(output_units, activation='softmax')
])

# Adjust learning rate with ExponentialDecay
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate, decay_steps=100, decay_rate=0.9)
optimizer = Adam(learning_rate=lr_schedule)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with validation set, early stopping, and a smaller batch size
model.fit(X_train, y_train, epochs=1000, validation_split=0.2, batch_size=32, callbacks=[early_stopping])

# Save the model
saved_model_path = 'transportation_model_saved_model'
tf.saved_model.save(model, saved_model_path)
print(f"Model saved as {saved_model_path}")


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Model saved as transportation_model_saved_model


### **Predict - Using Deep Learning TF**

In [None]:
# Load the saved model using tf.saved_model.load
loaded_model = tf.saved_model.load(saved_model_path)

# Create new data for recommendation
new_data = pd.DataFrame({
    'price': [10000000],  # Provide a sample price value
    'departure_location': ['Aceh']  # Provide a sample departure location
})

# Preprocess the new input data
new_data_transformed = preprocessor.transform(new_data).toarray()

# Make predictions using the loaded model
predictions = loaded_model(tf.constant(new_data_transformed, dtype=tf.float32))

# Get the predicted class
recommended_class = tf.argmax(predictions, axis=1).numpy()[0]

# Decode the recommended class using the inverse_transform of label_encoder
recommended_ticket = label_encoder.inverse_transform([recommended_class])[0]

# Display the recommended ticket and corresponding row
recommended_row = data[data['id'] == recommended_ticket]
print("Recommended Ticket:", recommended_ticket)
print("Recommended Row:")
print(recommended_row)


Recommended Ticket: 750
Recommended Row:
      id transportation_mode   provider        starting_point  \
749  750                 Bus  Nusantara  nearest bus terminal   

    transportation_class departure_location arrival_location     distance  \
749                Suite     Tanjung Pinang       Yogyakarta  1400.585471   

            price  
749  1.050439e+06  


### **Predict - Using Joblib**

In [None]:
# Example: Make prediction for user input
user_location = "Surakarta"
user_budget = 2500000
user_input = pd.DataFrame({'departure_location': [user_location], 'price': [user_budget]})
user_prediction = model.predict(user_input)

# Get the corresponding row from the original dataset for the predicted 'id'
predicted_row = data[data['id'] == int(user_prediction[0])]

# Display the predicted 'id' along with other columns
print("Predicted Transportation Information:")
for column in predicted_row.columns:
    print(f"{column}: {predicted_row.iloc[0][column]}")


Predicted Transportation Information:
id: 615
transportation_mode: Train
provider: PT KAI
starting_point: Kadipiro
transportation_class: Suite
departure_location: Surakarta
arrival_location: Yogyakarta
distance: 68.04500135455822
price: 81654.00162546987


In [None]:
import pandas as pd

data = pd.read_csv('transportation.csv')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

print(data)


      id transportation_mode                    provider  \
0      1               Plane                   Wings Air   
1      2               Train                      PT KAI   
2      3               Plane               Sriwijaya Air   
3      4               Plane                    Citilink   
4      5               Train                      PT KAI   
5      6               Train                      PT KAI   
6      7               Plane                   Wings Air   
7      8                 Bus                  Sinar Jaya   
8      9               Plane      PT Trigana Air Service   
9     10               Plane      PT Trigana Air Service   
10    11               Plane      PT Trigana Air Service   
11    12               Train                      PT KAI   
12    13               Plane               Sriwijaya Air   
13    14                 Bus                    Haryanto   
14    15               Train                      PT KAI   
15    16                 Bus            