Alterações a serem feitas na V2:
- Alterar as camadas.
- 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


import os
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

from skimage.io import imshow
from sklearn.metrics import classification_report, confusion_matrix,f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import mnist
import tensorflow.keras.utils as np_utils
from tensorflow.keras.utils import img_to_array , to_categorical
from tensorflow.keras import layers, models

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
class ResultData:
    train_images:np.ndarray
    test_images:np.ndarray
    train_labels:np.ndarray
    test_labels:np.ndarray
    def __init__(self,t:tuple[np.ndarray,np.ndarray,np.ndarray,np.ndarray]):
        self.train_images = t[0]
        self.test_images = t[1]
        self.train_labels = t[2]
        self.test_labels = t[3]

In [None]:
def Load20kPaths(
        base_path:str = "/kaggle/input/solarpanelpowerloss/data",
        num_images_per_class:int = 10000
    ) -> list[str]:
    zpaths:list[str] = list()
    opaths:list[str] = list()
    print("[INFO]: loading image paths...")
    
    
    dir_classes = os.listdir(base_path)
    for d in dir_classes:
        for file in os.listdir(os.path.join(base_path,d)):
            if(int(d) == 0):
                zpaths.append(os.path.join(d, file))
            else:
                opaths.append(os.path.join(d, file))
    assert(len(zpaths) > 14500)
    assert(len(opaths) > 30000)
    print("[INFO]:Verifique os seguintes caminhos: ", zpaths[-2:],opaths[-2:])
    random.seed(42)
    random.shuffle(zpaths)
    random.shuffle(opaths)
    paths = zpaths[0:num_images_per_class] + opaths[0:num_images_per_class]
    random.shuffle(paths)
    assert(len(paths) == 2*num_images_per_class)
    return paths

In [None]:
# Carregar metade das imagens e classes

def LoadAndSplitDataset(
        paths:list[str],base_path:str = "/kaggle/input/solarpanelpowerloss/data",
        num_images_per_class:int = 10000) -> ResultData:
    assert(len(paths) == int(num_images_per_class/2))
    print("[INFO]: Loading the images...")

    classes:list[int] = list()
    images:list[list[int,int,int]] = list()
     
    for path in tqdm(paths):
        image = cv2.imread(os.path.join(base_path,path))
        if (image is None):
            print(f"Image load failed with path: {path}")
            continue
        classes.append(int(path.split("/")[0]))
        images.append( img_to_array( cv2.resize( image,(192, 192) ) ) )
        
    # scale the raw pixel intensities to the range [0, 1]
    data = np.array(images, dtype="float") / 255.0
    labels = np.array(classes, dtype=np.int32)

    lenght = len(data)
    assert(lenght == int(num_images_per_class/2) and lenght == len(labels))

    (train_images, test_images, train_l, test_l) = train_test_split( data, labels, test_size=0.25, random_state=42)
    print("[INFO]: Training image arrays = ", train_l)
    print("[INFO]: Testing image arrays = ",test_l)
    
    # convert the labels from integers to vectors:
    # Transforma labels -> [1,0...] em train_labels -> [[1.0,0.0],[0.0,1.0]...]
    train_labels = np_utils.to_categorical(train_l, num_classes=2)
    test_labels = np_utils.to_categorical(test_l, num_classes=2)
    print("\n[INFO]: Data.shape = ",data.shape)
    return ResultData((train_images, test_images, train_labels, test_labels))

In [None]:
def ShowPics(train_images:np.ndarray, train_labels:np.ndarray):
    print("[INFO]: Plotting images ...")
    plt.figure(figsize=(10,10))
    for i in range(20):
        plt.subplot(5,4,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        img_float32 = np.float32(train_images[i])
        plt.imshow(cv2.cvtColor(img_float32, cv2.COLOR_BGR2RGB) )
        # labels
        plt.xlabel( int(train_labels[i][1]) )
    plt.show()

In [None]:
# Versão 1:
# - Não utilizar ativação softmax.
#Versão erro:
# - CategoricalCrossentropy(from_logits=True)
# Versão 2:
# - Utilizar softmax, mas alterar tipo de loss.
# Versão 3:
# - Usar layers.BatchNormalization()
# Versão 4:
# - Reduzir o tamanho da imagem para 128 pixels.
# Versão 5:
# - Alterar test it para fazer a predição como (atual, não usar com softmax):
#
# Softmax
#pred = self.instance.predict(test_images)
#y_pred = np.argmax(pred, axis=1)
#y_true = np.argmax(test_labels, axis=1)
#
# Logits + Sparse
#y_pred = np.argmax(pred, axis=1)

# Versão 6: dar variabilidade pro dataset?
#data_augmentation = tf.keras.Sequential([
#    layers.RandomFlip("horizontal"),
#    layers.RandomRotation(0.02),
#    layers.RandomZoom(0.02),
#])

# Versão 7: evita overfitting
# callback = tf.keras.callbacks.EarlyStopping(
#    monitor='val_loss',
#    patience=5,
#    restore_best_weights=True
#)

# Versão 8: modelo melhor?
#MobileNetV2
class Model: 
    instance: Sequential 
    def __init__(self, s: Sequential): 
        self.instance = models.Sequential()

    def configure(self, img_size:int = 192, kernel_size = 3, color_channel = 3) -> None:
        self.instance.add(layers.Conv2D(img_size, kernel_size=(kernel_size, kernel_size), activation='relu', input_shape=(img_size, img_size, color_channel))) # 3 é sobre RGB
        self.instance.add(layers.BatchNormalization())
        self.instance.add(layers.MaxPooling2D((2, 2)))
        
        self.instance.add(layers.Conv2D(128, kernel_size=(kernel_size, kernel_size), activation='relu'))
        self.instance.add(layers.BatchNormalization())
        self.instance.add(layers.MaxPooling2D((2, 2)))
       
        self.instance.add(layers.Conv2D(64, kernel_size=(kernel_size, kernel_size), activation='relu'))
        self.instance.add(layers.Flatten())
        self.instance.add(layers.Dropout(0.5))
        
        self.instance.add(layers.Dense(32, activation='relu')) # Dense: Núm. neurônios 1a cam
        self.instance.add(layers.BatchNormalization())
        self.instance.add(layers.Dense(8, activation='relu')) # Dense: Núm. neurônios 1a cam
        
        self.instance.add(layers.Dense(2))#activation="softmax"))
        self.instance.summary()
    
    def compile(self, optimizer:str = "adam") -> None:
        self.instance.compile(
            optimizer=optimizer,
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
        )
        
    def Run(self, rd: ResultData,epochs = 30, batch_size:int = 10):
        history =  self.instance.fit( 
            rd.train_images, 
            rd.train_labels, 
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(rd.test_images, rd.test_labels) 
        )
        return history
    
    
    def Testit(self, rd: ResultData) -> float:
        print("[INFO]: Generating test predictions...")
       
        # make class predictions
        #predictions = (self.instance.predict(test_images) > 0.5).astype(int)
        predictions = self.instance.predict(rd.test_images)
        # returns index of the maximum value in prediction array [0.0,0.0,1.0] -> 2
        y_pred_class = np.argmax(predictions, axis=1)
        y_test_class = np.argmax(rd.test_labels, axis=1)
    
        l = len(y_test_class)
        acc = sum([y_pred_class[i]==y_test_class[i] for i in range(l)])/l
        print('Accuracy: %.2f %%' % (acc*100))
    
        f1score = 0
        f1score += f1_score(y_test_class,y_pred_class,average="weighted")
        print("F_measure: ",round(100*f1score, 2),"%")

        print("Classes verdadeiras únicas:", np.unique(y_test_class))
        print("Classes preditas únicas:", np.unique(y_pred_class))
        #Confusion Matrix
        cm = confusion_matrix(y_test_class, y_pred_class)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.show()
        return acc
        
    def Save(self,accuracy:float):
        if (accuracy > 0.5):
            self.instance.save(r'./kaggle/working/chewey.keras')

In [None]:
def PlotTrainingAccuracy(history):
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')

In [None]:
def main():
    base_path = "/kaggle/input/solarpanelpowerloss/data"
    num_images_per_class:int = 10000
    assert(num_images_per_class < 14500)
    assert(num_images_per_class % 4 == 0)

    paths:list[str] = Load20kPaths(base_path,num_images_per_class)
    
    m: Model = Model(Sequential)
    m.configure()
    m.compile()
    accuracy:float = 0.0
    s
    for i in range(0,4):
        rd = LoadAndSplitDataset( 
                paths[int((i/2)*num_images_per_class):int( ((1+i)/2)*num_images_per_class)],
                base_path,
                num_images_per_class
            ) 
        ShowPics(rd.train_images, rd.train_labels)
        history = m.Run(rd, 10)
        PlotTrainingAccuracy(history)
        accuracy = m.Testit(rd)
        
    m.Save(accuracy)
    

main()