# Reading the data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
doc = pd.read_csv('data.csv')
doc

Unnamed: 0,Cargo_ID,Size_Category,Weight (kg),Hazardous,Stackable,Duration (days),Transport Type,A1,A2,A3,...,J2,J3,J4,J5,J6,J7,J8,J9,J10,Slot_ID
0,C00001,Small,39,0,1,3,Manual,1,1,0,...,0,0,1,1,1,0,1,1,1,F9
1,C00002,Small,36,0,1,1,Manual,0,1,0,...,0,0,0,0,0,0,0,1,1,A5
2,C00003,Small,23,0,1,3,Manual,0,1,0,...,0,0,1,1,0,0,1,1,0,C1
3,C00004,Small,26,0,1,2,Manual,0,1,0,...,0,1,0,0,0,1,1,0,1,C5
4,C00005,Oversized,288,1,0,1,Forklift,1,1,1,...,0,1,1,1,1,0,1,1,0,E7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,C09996,Large,159,0,0,1,Forklift,0,1,0,...,1,0,1,0,0,1,0,1,1,G9
9996,C09997,Large,127,0,0,1,Forklift,0,1,0,...,1,1,1,0,1,1,0,0,0,A3
9997,C09998,Oversized,275,1,0,1,Forklift,1,0,0,...,0,0,0,1,0,0,0,1,1,J10
9998,C09999,Small,16,0,1,1,Manual,0,0,1,...,1,0,0,1,1,0,1,0,1,C10


In [3]:
doc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 108 entries, Cargo_ID to Slot_ID
dtypes: int64(104), object(4)
memory usage: 8.2+ MB


In [4]:
doc.columns

Index(['Cargo_ID', 'Size_Category', 'Weight (kg)', 'Hazardous', 'Stackable',
       'Duration (days)', 'Transport Type', 'A1', 'A2', 'A3',
       ...
       'J2', 'J3', 'J4', 'J5', 'J6', 'J7', 'J8', 'J9', 'J10', 'Slot_ID'],
      dtype='object', length=108)

# Spliiting the data 

In [5]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(doc, test_size=0.2, random_state=42)

In [6]:
Y_train = train_set['Slot_ID']
Y_test = test_set['Slot_ID']

Y_train.shape, Y_test.shape

((8000,), (2000,))

In [7]:
doc = train_set.drop('Slot_ID', axis=1).copy()
test_doc = test_set.drop('Slot_ID', axis=1).copy()

doc.shape, test_doc.shape

((8000, 107), (2000, 107))

# Creating Data Preprocessing Pipeline

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder ,OneHotEncoder, StandardScaler

pipeline = ColumnTransformer([
    ("drop", "drop", ['Cargo_ID']),
    ('ordinal', OrdinalEncoder(), ['Size_Category']),
    ('one_hot', OneHotEncoder(), ['Transport Type']),
    ('salar', StandardScaler(), ['Weight (kg)', 'Duration (days)'])
], remainder='passthrough')

X_train = pipeline.fit_transform(doc)
X_test = pipeline.transform(test_doc)

In [9]:
X_train

array([[3., 0., 1., ..., 0., 0., 1.],
       [1., 1., 0., ..., 0., 1., 1.],
       [3., 0., 1., ..., 0., 1., 1.],
       ...,
       [1., 1., 0., ..., 1., 0., 0.],
       [2., 1., 0., ..., 1., 0., 1.],
       [3., 0., 1., ..., 1., 1., 1.]])

In [10]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

Y_train = encoder.fit_transform(Y_train)
Y_test = encoder.transform(Y_test)

label_mapping = dict(zip(range(len(encoder.classes_)), encoder.classes_))
print("Label Mapping:", label_mapping)

Label Mapping: {0: 'A1', 1: 'A10', 2: 'A2', 3: 'A3', 4: 'A4', 5: 'A5', 6: 'A6', 7: 'A7', 8: 'A8', 9: 'A9', 10: 'B1', 11: 'B10', 12: 'B2', 13: 'B3', 14: 'B4', 15: 'B5', 16: 'B6', 17: 'B7', 18: 'B8', 19: 'B9', 20: 'C1', 21: 'C10', 22: 'C2', 23: 'C3', 24: 'C4', 25: 'C5', 26: 'C6', 27: 'C7', 28: 'C8', 29: 'C9', 30: 'D1', 31: 'D10', 32: 'D2', 33: 'D3', 34: 'D4', 35: 'D5', 36: 'D6', 37: 'D7', 38: 'D8', 39: 'D9', 40: 'E1', 41: 'E10', 42: 'E2', 43: 'E3', 44: 'E4', 45: 'E5', 46: 'E6', 47: 'E7', 48: 'E8', 49: 'E9', 50: 'F1', 51: 'F10', 52: 'F2', 53: 'F3', 54: 'F4', 55: 'F5', 56: 'F6', 57: 'F7', 58: 'F8', 59: 'F9', 60: 'G1', 61: 'G10', 62: 'G2', 63: 'G3', 64: 'G4', 65: 'G5', 66: 'G6', 67: 'G7', 68: 'G8', 69: 'G9', 70: 'H1', 71: 'H10', 72: 'H2', 73: 'H3', 74: 'H4', 75: 'H5', 76: 'H6', 77: 'H7', 78: 'H8', 79: 'H9', 80: 'I1', 81: 'I10', 82: 'I2', 83: 'I3', 84: 'I4', 85: 'I5', 86: 'I6', 87: 'I7', 88: 'I8', 89: 'I9', 90: 'J1', 91: 'J10', 92: 'J2', 93: 'J3', 94: 'J4', 95: 'J5', 96: 'J6', 97: 'J7', 98: 

# Training 

In [13]:
from sklearn.svm import SVC

svc_clf = SVC()
svc_clf.fit(X_train, Y_train)
label_mapping[svc_clf.predict([X_train[0]])[0]]

'D4'

In [14]:
svc_clf.classes_

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [15]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, Y_train)

In [16]:
from sklearn.ensemble import RandomForestClassifier

forest_clf = RandomForestClassifier(random_state=42)
forest_clf.fit(X_train, Y_train)

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2

model = Sequential([
    Input(X_train.shape[1:]),
    Dense(300, activation='selu', kernel_regularizer=l2(0.001)),
    Dropout(0.3), 
    Dense(300, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(200, activation='selu', kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(200, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(150, activation='relu', kernel_regularizer=l2(0.001)),
    Dense(150, activation='relu', kernel_regularizer=l2(0.001)),
    Dense(100, activation='softmax', kernel_initializer='he_normal')
])


model.summary()

In [12]:
optimizer = "nadam"
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

history = model.fit(X_train, Y_train, epochs=100,
                    validation_data=(X_test, Y_test))

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.0107 - loss: 5.7702 - val_accuracy: 0.0055 - val_loss: 5.4370
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0081 - loss: 5.3419 - val_accuracy: 0.0085 - val_loss: 5.0913
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0119 - loss: 5.0219 - val_accuracy: 0.0145 - val_loss: 4.8607
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0119 - loss: 4.8203 - val_accuracy: 0.0145 - val_loss: 4.7322
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0120 - loss: 4.7064 - val_accuracy: 0.0145 - val_loss: 4.6647
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0121 - loss: 4.6493 - val_accuracy: 0.0145 - val_loss: 4.6328
Epoch 7/100
[1m250/25

# Testing

In [17]:
from sklearn.metrics import accuracy_score

y_pred_proba = model.predict(X_test)  
y_pred = np.argmax(y_pred_proba, axis=1)  
test_accuracy = accuracy_score(Y_test, y_pred)

print("Test Accuracy:", test_accuracy)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Test Accuracy: 0.0145


In [20]:
Y_pred = svc_clf.predict(X_test)
accuracy_score(Y_test, Y_pred)

0.0105

In [21]:
Y_pred = tree_clf.predict(X_test)
accuracy_score(Y_test, Y_pred)

0.008

In [22]:
Y_pred = forest_clf.predict(X_test)
accuracy_score(Y_test, Y_pred)

0.0055

# Saving and Wrapping Up

In [20]:
import pickle

with open('pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [22]:
with open('pipeline.pkl', 'rb') as file:
    loaded_pipeline = pickle.load(file)

test = loaded_pipeline.transform(test_doc)
test == X_test

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [15]:
model.save("storage_optimizer.keras")

In [18]:
model = tf.keras.models.load_model('storage_optimizer.keras')

y_pred_proba = model.predict(X_test)  
y_pred = np.argmax(y_pred_proba, axis=1)  
test_accuracy = accuracy_score(Y_test, y_pred)

print("Test Accuracy:", test_accuracy)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Test Accuracy: 0.0145
