In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
import pillow_heif
from PIL import Image

# Função para ler imagem HEIC
def read_heic_image(file_path):
    heif_file = pillow_heif.open_heif(file_path)
    image = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Função para carregar imagens e processá-las
def load_and_process_images(folder_path, image_size=(256, 256)):
    images = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith('.heic') or filename.lower().endswith('.tiff'):
            image_path = os.path.join(folder_path, filename)
            image = read_heic_image(image_path) if filename.lower().endswith('.heic') else cv2.imread(image_path)
            if image is None:
                continue
            image = cv2.resize(image, image_size)
            image = img_to_array(image) / 255.0  # Normalização
            images.append(image)
    return np.array(images)

# Carregar dados de treinamento
correct_areas_file = 'D://__ciencia_de_dados_python_2023//arroz//ARROZ.xlsx'
correct_areas_df = pd.read_excel(correct_areas_file)
y_train = correct_areas_df[' Chalky Grains (%)'].values  # Usar a segunda coluna como valores de referência

# Carregar e processar as imagens de treinamento
training_image_folder = 'D://__ciencia_de_dados_python_2023//arroz//RGB'
X_train = load_and_process_images(training_image_folder)

# Verificar se os dados de treinamento estão corretos
print(f"Formato dos dados de treinamento: {X_train.shape}")
print(f"Formato das áreas corretas: {y_train.shape}")

# Dividir os dados em conjuntos de treinamento e validação
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Usar ResNet50 como base para transfer learning
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# Congelar as camadas do modelo base
for layer in base_model.layers:
    layer.trainable = False

# Adicionar camadas personalizadas
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1)(x)

# Criar o modelo final
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Treinar o modelo
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=16),
    steps_per_epoch=len(X_train) // 16,
    validation_data=(X_val, y_val),
    epochs=50
)

# Avaliar o modelo
loss, mae = model.evaluate(X_val, y_val)
print(f'Mean Absolute Error na validação: {mae}')

# Aplicar o modelo em novas imagens
new_image_folder = 'D://__ciencia_de_dados_python_2023//arroz//ipad'
X_new = load_and_process_images(new_image_folder)

# Prever as áreas corrigidas para as novas imagens
predicted_areas = model.predict(X_new)

# Salvar os dados corrigidos no Excel
new_image_names = [f for f in os.listdir(new_image_folder) if f.lower().endswith('.heic') or f.lower().endswith('.tiff')]
new_df = pd.DataFrame({'Image Name': new_image_names, 'Predicted Chalk Impact': predicted_areas.flatten()})
new_df.to_excel('D://__ciencia_de_dados_python_2023//arroz//TESTE//IPADFINAL_CORRIGIDO_ML.xlsx', index=False)

print("Previsões salvas com sucesso.")
