<a href="https://colab.research.google.com/github/mroziken/watermeter_ml/blob/main/water_meter_numbers_recognition_model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow gcsfs
!pip install google-auth google-auth-oauthlib google-auth-httplib2 google-cloud-storage



In [2]:
import tensorflow as tf
import pandas as pd
import datetime
from tensorflow.keras.callbacks import TensorBoard
import time

In [3]:
# Define paths to your CSV files in GCS
train_csv_path = 'gs://garden-watermeter-readings_metadata/training-dataset.csv'
val_csv_path = 'gs://garden-watermeter-readings_metadata/validation-dataset.csv'
test_csv_path = 'gs://garden-watermeter-readings_metadata/test-dataset.csv'

In [4]:
from google.colab import auth
auth.authenticate_user()

In [5]:
# Load CSV files without headers and assign column names
column_names = ['image_uri', 'label']
train_df = pd.read_csv(train_csv_path, header=None, names=column_names)
val_df = pd.read_csv(val_csv_path, header=None, names=column_names)
test_df = pd.read_csv(test_csv_path, header=None, names=column_names)

In [6]:
# Function to read and decode images from GCS
def load_image(image_uri, label):
    image = tf.io.read_file(image_uri)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 160])  # Resize to maintain aspect ratio and reduce size
    image /= 255.0  # Normalize to [0,1]
    return image, label



In [7]:
# Convert DataFrame to TensorFlow Dataset
def df_to_dataset(df, shuffle=True, batch_size=32):
    df = df.copy()
    labels = df.pop('label')
    ds = tf.data.Dataset.from_tensor_slices((df['image_uri'].values, labels.values))
    ds = ds.map(lambda x, y: (x, tf.cast(y, tf.int32)))
    ds = ds.map(lambda x, y: (tf.py_function(load_image, [x, y], [tf.float32, tf.int32])))
    ds = ds.map(lambda x, y: (tf.ensure_shape(x, [128, 160, 3]), tf.ensure_shape(y, [])))  # Ensure correct shape
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

In [8]:
# Create TensorFlow datasets
batch_size = 32
train_ds = df_to_dataset(train_df, batch_size=batch_size)
val_ds = df_to_dataset(val_df, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test_df, shuffle=False, batch_size=batch_size)



In [9]:
# Define a simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 160, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')  # 10 categories
])

In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Define a log directory
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Start the timer
start_time = time.time()

# Train the model with the TensorBoard callback
model.fit(train_ds, epochs=10, validation_data=val_ds, callbacks=[tensorboard_callback])

# End the timer
end_time = time.time()
training_time = end_time - start_time

print(f'Total training time: {training_time / 60:.2f} minutes')

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_ds)
print(f'Test accuracy: {test_acc}')

# Save the model
model.save('water_meter_numbers_recognition_model.h5')

Epoch 1/10
Epoch 2/10


In [None]:
# Convert the model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [None]:
# Save the converted model
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:
# Load and launch TensorBoard
%load_ext tensorboard
%tensorboard --logdir logs/fit