In [None]:
import warnings

import pandas as pd
from google.cloud import bigquery
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")
#https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/workbench/ads_targetting/training-multi-class-classification-model-for-ads-targeting-usecase.ipynb
    

In [None]:
categ_cols=['sla','product_type','brand','service_type','incident_type']
num_cols=['open_to_close_hour','response_to_resolved_hour']
label='severity_name'
unusedCols=['severity_id','severity_name']

target='severity'

In [None]:
df=pd.read_csv("../../data/ML_Incident_20230316.csv")
df[target] = df[label].map({'Cosmatic':0,'Minor': 1, "Major": 2, "Critical": 3})
print(df.info())


In [None]:
fig , ax = plt.subplots(figsize=(15,5))
ax =sns.countplot(x=target, data=df,)
for p in ax.patches:
   ax.annotate('{:.1f}'.format(p.get_height()), (p.get_x()+0.25, p.get_height()+0.01))
plt.title(target.title())
plt.show()

In [None]:
listTarget=list(df[target].unique())
nTarget=len(listTarget)
print(f"TargetLabel :{listTarget } = {nTarget}")

df=df.drop(columns=unusedCols)

print(df.info())

df.tail()

In [None]:
def encode_cols(data, col):
    # Creating a dummy variable for the variable 'CategoryID' and dropping the first one.
    categ = pd.get_dummies(data[col], prefix=col, drop_first=True)
    # Adding the results to the master dataframe
    data = pd.concat([data, categ], axis=1)
    return data

# dummy-encode the categorical fields
for i in categ_cols:
    df = encode_cols(df, i)
    df.drop(columns=[i], inplace=True)

# check the data's shape
print(df.info())

In [None]:
X=df.copy()
y=X.pop(target)
print(X.shape,y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=36
)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:
# For numberic
scaler = StandardScaler()
X_train.loc[:, num_cols] = scaler.fit_transform(X_train[num_cols])
X_test.loc[:, num_cols] = scaler.transform(X_test[num_cols])

# For category
y_train_categ = to_categorical(y_train)
y_test_categ = to_categorical(y_test)

In [None]:
X_train[:1]

In [None]:
y_train_categ[:1]

In [None]:
optimizer = "adam"
num_hidden_layers = 2
num_neurons = [32, 32]
activ_func = ["relu", "relu"]


model = Sequential()
# construct the neural network as per the defined parameters
for i in range(num_hidden_layers):
    if i == 0:
        # add the input layer
        model.add(
            Dense(
                num_neurons[i],
                activation=activ_func[i],
                input_shape=(X_train.shape[1],),
            )
        )
    else:
        # add the hidden layers
        model.add(Dense(num_neurons[i], activation=activ_func[i]))

# add the output layer
model.add(Dense(nTarget, activation="softmax"))
# compile the model
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
model.summary()


In [None]:
EPOCHS = 100
BATCH_SIZE = 32
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
history = model.fit(X_train, y_train_categ, validation_data=(X_test, y_test_categ),epochs=EPOCHS,batch_size=BATCH_SIZE, verbose=1,callbacks = [early_stopping])

In [None]:
test_results = model.evaluate(X_test, y_test_categ, verbose=1)
print(f"Test results - Loss: {test_results}")

In [None]:
plt.figure(figsize=(15,8))
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
def plot_metrics(history):
  metrics =  ['loss', 'accuracy']
  for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(2,2,n+1)
    plt.plot(history.epoch,  history.history[metric], color=colors[0], label='Train')
    plt.plot(history.epoch, history.history['val_'+metric],
             color=colors[0], linestyle="--", label='Val')
    plt.xlabel('Epoch')
    plt.ylabel(name)
    if metric == 'loss':
      plt.ylim([0, plt.ylim()[1]])
    elif metric == 'auc':
      plt.ylim([0.8,1])
    else:
      plt.ylim([0,1])

    plt.legend()

plot_metrics(history)

In [None]:
# GCS_PATH = BUCKET_URI + "/path-to-save/"
# model.save(GCS_PATH)