Skip to content

Commit

Permalink
Merge pull request #8 from milank94/model-training-update
Browse files Browse the repository at this point in the history
Model training update
  • Loading branch information
milank94 committed Oct 24, 2021
2 parents 93a2ba5 + feca75a commit 36afd89
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 47 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,11 @@ run-eda:
process-data:
@echo "Processing data"
@python data_processing/main.py

train-model:
@echo "Training model"
@python model_training/main.py

evaluate-model:
@echo "Evaluating model"
@python model_evaluation/model_evaluation.py
7 changes: 5 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
name: motor-fault-classification
channels:
- defaults
- conda-forge
dependencies:
- pip>=8.1.2
- python=3.8.8
- numpy=1.20.2
- pandas=1.2.3
- keras=2.4.3
- matplotlib=3.4.1
- scikit-learn=0.24.2
- matplotlib=3.4.1
- scipy=1.6.3
- pip:
- tensorflow>=0.12.0rc1
8 changes: 8 additions & 0 deletions model_evaluation/model_eval_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Any config specific to model training step goes here.
"""

from pathlib import Path

# Output data path
OUTPUT_DATA_DIR = Path('./output')
39 changes: 39 additions & 0 deletions model_evaluation/model_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model
from data_acquisition.main import get_save_data
from data_processing.main import get_save_train_test_data
import model_eval_config as config

# Load the data
dataset = get_save_data()

# Process the data and train/test split
train_test_data = get_save_train_test_data(dataset)

# Prepare the data
X_test = np.array(train_test_data.X_test)
y_test = np.array(train_test_data.y_test)

# Loading the model and checking accuracy on the test data
model_path = config.OUTPUT_DATA_DIR / Path('best_model.pkl')
model = load_model(model_path)
test_preds = np.argmax(model.predict(X_test), axis=-1)
print(accuracy_score(y_test, test_preds))

# Comparing the actual values versus the predicted values
data_dict = {
0: 'normal',
1: 'horizontal misalignment',
2: 'imbalance',
3: 'vertical misalignment',
4: 'overhang',
5: 'underhang'
}
results = pd.DataFrame([y_test, test_preds]).T
results.columns = ['Actual', 'Prediction']
results.applymap(lambda x: data_dict[x])

print(results)
55 changes: 18 additions & 37 deletions model_training/main.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import logging
import os
from data_acquisition.main import get_data
from data_processing.main import get_train_test_data
from data_acquisition.main import get_save_data
from data_processing.main import get_save_train_test_data
import model_training_config as config

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

from keras.models import load_model
from keras.callbacks import ModelCheckpoint

from sklearn.metrics import accuracy_score

path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint

logging.basicConfig(level=logging.INFO)

# Model Development
config.OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)

# Model Development
# Load the data
dataset = get_data()
dataset = get_save_data()

# Process the data and train/test split
train_test_data = get_train_test_data(dataset)

train_test_data = get_save_train_test_data(dataset)

# Prepare the data
X_train = np.array(train_test_data.X_train)
Expand All @@ -41,24 +33,25 @@
model = Sequential()
model.add(LSTM(config.LSTM_UNITS, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(config.OUTPUT_SIZE, activation=config.ACTIVATION))
model.summary()

print(model.summary())

# Training the model
chk = ModelCheckpoint('best_model.pkl', monitor=config.MONITOR, save_best_only=True, mode='auto', verbose=1)
model_path = config.OUTPUT_DATA_DIR / Path('best_model.pkl')
chk = ModelCheckpoint(model_path, monitor=config.MONITOR, save_best_only=True, mode='auto', verbose=1)
model.compile(loss=config.LOSS_FUNCTION, optimizer=config.OPTIMIZER, metrics=['accuracy'])
hist = model.fit(
X_train,
y_train,
epochs=config.EPOCHS,
batch_size=int(X_train.shape[0]),
callbacks=[chk],
validation_split=config.VAL_SPLOT
validation_split=config.VAL_SPLIT
)

# Model Validation
# Plotting training and validation accuracy per epoch
fig, axs = plt.subplots(nrows=1, figsize=(11, 9))
_, axs = plt.subplots(nrows=1, figsize=(11, 9))
file_location = config.OUTPUT_DATA_DIR / Path('plots/model_accuracy.png')
plt.rcParams['font.size'] = '14'

for label in (axs.get_xticklabels() + axs.get_yticklabels()):
Expand All @@ -71,16 +64,4 @@
axs.set_ylabel('Accuracy', fontsize=14)
axs.set_xlabel('Epoch', fontsize=14)
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# Loading the model and checking accuracy on the test data
model = load_model('best_model.pkl')

test_preds = model.predict_classes(X_test)
accuracy_score(y_test, test_preds)

# Comparing the actual values versus the predicted values
data_dict = {0: 'normal', 1: 'horizontal misalignment', 2: 'imbalance'}
results = pd.DataFrame([y_test, test_preds]).T
results.columns = ['Actual', 'Prediction']
results.applymap(lambda x: data_dict[x])
plt.savefig(file_location)
14 changes: 6 additions & 8 deletions model_training/model_training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@
Any config specific to model training step goes here.
"""

from pathlib import Path

# General configuration
MONITOR = 'val_loss'
VAL_SPLIT = 0.2
LSTM_UNITS = 100
OPTIMIZER = 'adam'
EPOCHS = 100

# +
# Binary classification
# OUTPUT_SIZE = 1
# ACTIVATION = 'sigmoid'
# LOSS_FUNCTION = 'binary_crossentropy'
# -

# Multi-class classification
OUTPUT_SIZE = 3
OUTPUT_SIZE = 6
ACTIVATION = 'softmax'
LOSS_FUNCTION = 'sparse_categorical_crossentropy'

# Output data path
OUTPUT_DATA_DIR = Path('./output')

0 comments on commit 36afd89

Please sign in to comment.