In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class WaterMeterDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.transform(self.images[idx])
        label = float(self.labels[idx])
        return image, torch.tensor(label, dtype=torch.float32)

class SimpleOCRModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
        self.classifier = nn.Sequential(
            nn.Linear(1000, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)

def load_roi(imagem, maskara):
    image = cv2.imread(imagem)
    mask = cv2.imread(maskara, cv2.IMREAD_GRAYSCALE)
    roi = cv2.bitwise_and(image, image, mask=mask)
    return Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))

def prepare_data(data, images_path, masks_path):
    ocr_inputs = []
    ocr_labels = []
    for _, row in data.iterrows():
        image_file = os.path.join(images_path, row['photo_name'])
        mask_file = os.path.join(masks_path, row['photo_name'])
        true_value = row['value']

        if os.path.exists(image_file) and os.path.exists(mask_file):
            roi = load_roi(image_file, mask_file)
            ocr_inputs.append(roi)
            ocr_labels.append(true_value)
    return ocr_inputs, ocr_labels

# Configuration
DATASET_PATH = "/home/guilherme/Documentos/Dataset's/WaterMeters"
CSV_FILE = os.path.join(DATASET_PATH, "data.csv")
IMAGES_FOLDER = os.path.join(DATASET_PATH, "images")
MASKS_FOLDER = os.path.join(DATASET_PATH, "masks")

# Load and split data
data = pd.read_csv(CSV_FILE)
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Prepare datasets
train_images, train_labels = prepare_data(train_data, IMAGES_FOLDER, MASKS_FOLDER)
test_images, test_labels = prepare_data(test_data, IMAGES_FOLDER, MASKS_FOLDER)

# Create datasets
train_dataset = WaterMeterDataset(train_images, train_labels)
test_dataset = WaterMeterDataset(test_images, test_labels)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleOCRModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            total_train_loss += loss.item()
        
        # Validation
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
        
        print(f"Epoch {epoch+1}: Train Loss = {total_train_loss/len(train_loader):.4f}, "
              f"Val Loss = {total_val_loss/len(test_loader):.4f}")
    
    return model

# Train the model
trained_model = train_model(model, train_loader, test_loader, criterion, optimizer)

# Prediction Function
def predict_values(model, test_loader):
    model.eval()
    predictions = []
    actual_values = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images).cpu().numpy()
            predictions.extend(outputs)
            actual_values.extend(labels.numpy())
    
    return predictions, actual_values

# Get predictions
predictions, actual_values = predict_values(trained_model, test_loader)

# Optional: Calculate metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

mae = mean_absolute_error(actual_values, predictions)
mse = mean_squared_error(actual_values, predictions)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error: {mae}")
print(f"Root Mean Squared Error: {rmse}")

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /home/guilherme/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/guilherme/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:04<00:00, 11.4MB/s]


Epoch 1: Train Loss = 10188829.4405, Val Loss = 8117108.0176
Epoch 2: Train Loss = 10152770.1040, Val Loss = 8109395.3584
Epoch 3: Train Loss = 10172426.1626, Val Loss = 8088500.0477
Epoch 4: Train Loss = 10143198.7055, Val Loss = 9904294.5156
Epoch 5: Train Loss = 10128089.1271, Val Loss = 8263454.9806
Epoch 6: Train Loss = 10086420.9070, Val Loss = 8180614.6426
Epoch 7: Train Loss = 10058545.6048, Val Loss = 8189321.4243
Epoch 8: Train Loss = 10132391.1288, Val Loss = 8534264.1074
Epoch 9: Train Loss = 10045634.1717, Val Loss = 9355662.6748
Epoch 10: Train Loss = 9863079.4353, Val Loss = 8385714.6167
Mean Absolute Error: 485.6060791015625
Root Mean Squared Error: 2936.067626953125
