In [None]:
from google.colab import drive
drive.mount('/content/drive')

# ***GENERATING POSITIVE AND NEGATIVE REPORTS***

In [None]:
import tarfile
import os
import xml.etree.ElementTree as ET

def extract_tar_bz2(tar_path, extract_path):
    os.makedirs(extract_path, exist_ok=True)
    with tarfile.open(tar_path, "r:bz2") as tar:
        tar.extractall(path=extract_path)
    return extract_path

def parse_bug_reports(directory):
    positive_reports = []
    negative_reports = []

    for dir_root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".xml"):
                file_path = os.path.join(dir_root, file)
                tree = ET.parse(file_path)
                xml_root = tree.getroot()

                for bug in xml_root.findall("bug"):
                    bug_id = bug.find("bug_id").text if bug.find("bug_id") is not None else "Unknown"
                    description = bug.find("short_desc").text if bug.find("short_desc") is not None else "No description"
                    resolution = bug.find("resolution").text if bug.find("resolution") is not None else "UNRESOLVED"

                    report_text = f"Bug ID: {bug_id}\nDescription: {description}\nResolution: {resolution}\n\n"

                    if resolution in ["FIXED", "VERIFIED", "DUPLICATE"]:
                        positive_reports.append(report_text)
                    else:
                        negative_reports.append(report_text)

    return positive_reports, negative_reports

def save_reports(positive_reports, negative_reports, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    with open(os.path.join(output_dir, "positive_reports.txt"), "w", encoding="utf-8") as pos_file:
        pos_file.writelines(positive_reports)

    with open(os.path.join(output_dir, "negative_reports.txt"), "w", encoding="utf-8") as neg_file:
        neg_file.writelines(negative_reports)

def main():
    tar_path = "/content/drive/MyDrive/linux.tar.bz2"
    extract_path = "/content/sample_data/extracted"
    output_dir = "/content/sample_data/out"

    print("Extracting tar.bz2 file...")
    extract_tar_bz2(tar_path, extract_path)

    print("Parsing bug reports...")
    positive_reports, negative_reports = parse_bug_reports(extract_path)

    print("Saving reports...")
    save_reports(positive_reports, negative_reports, output_dir)

    print("Processing complete! Positive and negative reports saved.")

if __name__ == "__main__":
    main()


Extracting tar.bz2 file...
Parsing bug reports...
Saving reports...
Processing complete! Positive and negative reports saved.


# ***TRAINING MODEL ON LINUX DATASET***

In [None]:
import tensorflow as tf
from keras import backend as K
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
import numpy as np
from keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, BatchNormalization, Input, concatenate
from keras.models import Model, load_model
from sklearn.model_selection import train_test_split
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch
import gc
import os
import json
from datetime import datetime
from itertools import islice
import multiprocessing
import tempfile
import psutil
import time

os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

NUM_WORKERS = min(2, max(1, multiprocessing.cpu_count() - 1))
DEFAULT_THRESHOLD = 0.3

def print_memory_usage(label):
    process = psutil.Process(os.getpid())
    memory_mb = process.memory_info().rss / (1024 * 1024)
    print(f"[MEMORY] {label}: {memory_mb:.2f} MB")

class TextClassificationDataset(Dataset):
    def __init__(self, reports, tokenizer, max_length):
        self.reports = reports
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.reports)

    def __getitem__(self, idx):
        text = " ".join(self.reports[idx][:self.max_length])
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        return {k: v.squeeze(0) for k, v in encoding.items()}

def load_reports(path, max_reports=None, chunk_size=1000):
    reports = []
    with open(path, 'r', encoding='UTF-8') as infile:
        while True:
            if max_reports and len(reports) >= max_reports:
                break
            chunk_to_read = min(chunk_size, max_reports - len(reports)) if max_reports else chunk_size
            chunk = list(islice(infile, chunk_to_read))
            if not chunk:
                break
            reports.extend(chunk)
            if len(reports) % (chunk_size * 5) == 0:
                gc.collect()
    return reports

def process_reports(reports):
    processed = []
    chunk_size = 5000
    for i in range(0, len(reports), chunk_size):
        chunk = reports[i:i+chunk_size]
        processed.extend([report.replace(' \n', '').split(' ') for report in chunk])
        if i + chunk_size < len(reports):
            gc.collect()
    return processed

def generate_embeddings_batch(model_dict, dataset, output_path, batch_size=16, max_length=30):
    model = model_dict["model"]
    device = model_dict["device"]

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True if device.type == 'cuda' else False
    )

    embedding_dim = model.config.hidden_size
    total_samples = len(dataset)

    with open(output_path, 'w+b') as f:
        memmap = np.memmap(
            f,
            dtype=np.float16,
            mode='w+',
            shape=(total_samples, max_length, embedding_dim)
        )

        current_idx = 0
        for batch in tqdm(dataloader, desc="Generating embeddings"):
            batch = {k: v.to(device) for k, v in batch.items()}
            batch_size_actual = batch['input_ids'].size(0)
            with torch.no_grad():
                if device.type == 'cuda':
                    with torch.amp.autocast(device_type='cuda'):
                        outputs = model(**batch)
                else:
                    outputs = model(**batch)

                hidden_states = outputs.last_hidden_state
                embeddings = hidden_states.cpu().numpy().astype(np.float16)

                end_idx = min(current_idx + batch_size_actual, total_samples)
                memmap[current_idx:end_idx] = embeddings[:end_idx-current_idx]
                memmap.flush()
                current_idx = end_idx

                del outputs, hidden_states, embeddings
                batch = {k: v.detach().cpu() for k, v in batch.items()}
                if device.type == 'cuda':
                    torch.cuda.empty_cache()
                gc.collect()

    return output_path

def load_model_transformer(model_directory):
    print("Loading transformer model...")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_kwargs = {'low_cpu_mem_usage': True}
    if device.type == 'cuda':
        model_kwargs['torch_dtype'] = torch.float16
    tokenizer = AutoTokenizer.from_pretrained(model_directory)
    model = AutoModel.from_pretrained(model_directory, **model_kwargs).to(device)
    model.eval()
    if hasattr(model, 'gradient_checkpointing_enable'):
        model.gradient_checkpointing_enable()
    return {"model": model, "tokenizer": tokenizer, "device": device}

def get_samples(emb_path, padding_size, dimension):
    file_size = os.path.getsize(emb_path)
    return file_size // (padding_size * dimension * 2)

def CNN_preprocess(pos_path, neg_path, model_path, max_reports=None, padding_size=30, batch_size=16):
    print_memory_usage("Before loading reports")
    pos_reports = load_reports(pos_path, max_reports)
    print_memory_usage("After loading positive reports")
    pos_reports = process_reports(pos_reports)
    print_memory_usage("After processing positive reports")
    neg_reports = load_reports(neg_path, max_reports)
    print_memory_usage("After loading negative reports")
    neg_reports = process_reports(neg_reports)
    print_memory_usage("After processing negative reports")

    if not padding_size:
        sample_reports = pos_reports[:1000] + neg_reports[:1000]
        padding_size = min(max(len(r) for r in sample_reports), 100)
        del sample_reports
        gc.collect()

    print(f"Using padding size: {padding_size}")

    model_dict = load_model_transformer(model_path)
    dimension = model_dict["model"].config.hidden_size
    print_memory_usage("After loading model")

    pos_emb_path = tempfile.mktemp()
    neg_emb_path = tempfile.mktemp()

    pos_dataset = TextClassificationDataset(pos_reports, model_dict["tokenizer"], padding_size)
    print_memory_usage("Before generating positive embeddings")
    generate_embeddings_batch(model_dict, pos_dataset, pos_emb_path, batch_size, padding_size)
    print_memory_usage("After generating positive embeddings")
    del pos_dataset, pos_reports
    gc.collect()

    neg_dataset = TextClassificationDataset(neg_reports, model_dict["tokenizer"], padding_size)
    print_memory_usage("Before generating negative embeddings")
    generate_embeddings_batch(model_dict, neg_dataset, neg_emb_path, batch_size, padding_size)
    print_memory_usage("After generating negative embeddings")
    del neg_dataset, neg_reports
    gc.collect()

    del model_dict["model"], model_dict["tokenizer"]
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    print_memory_usage("After clearing model")

    pos_samples = get_samples(pos_emb_path, padding_size, dimension)
    neg_samples = get_samples(neg_emb_path, padding_size, dimension)
    total_samples = pos_samples + neg_samples
    print(f"Positive samples: {pos_samples}, Negative samples: {neg_samples}")

    combined_emb_path = tempfile.mktemp()
    x_combined = np.memmap(combined_emb_path, dtype=np.float16, mode='w+',
                           shape=(total_samples, padding_size, dimension))

    try:
        chunk_size = 500
        print("Merging positive embeddings...")
        x_pos = np.memmap(pos_emb_path, dtype=np.float16, mode='r',
                          shape=(pos_samples, padding_size, dimension))
        for i in tqdm(range(0, pos_samples, chunk_size)):
            end = min(i + chunk_size, pos_samples)
            x_combined[i:end] = x_pos[i:end]
        del x_pos

        print("Merging negative embeddings...")
        neg_start = pos_samples
        x_neg = np.memmap(neg_emb_path, dtype=np.float16, mode='r',
                          shape=(neg_samples, padding_size, dimension))
        for i in tqdm(range(0, neg_samples, chunk_size)):
            end = min(i + chunk_size, neg_samples)
            x_combined[neg_start+i:neg_start+end] = x_neg[i:end]
        del x_neg

        x_combined.flush()
        print_memory_usage("After merging embeddings")

        y_combined_path = tempfile.mktemp()
        y_combined = np.memmap(y_combined_path, dtype=np.float32, mode='w+',
                               shape=(total_samples, 2))
        y_combined[:, 0] = 0
        y_combined[:, 1] = 1
        chunk_size = 5000
        for i in tqdm(range(0, pos_samples, chunk_size)):
            end = min(i + chunk_size, pos_samples)
            y_combined[i:end, 0] = 1
            y_combined[i:end, 1] = 0
        y_combined.flush()
        print_memory_usage("After creating labels")
    finally:
        os.remove(pos_emb_path)
        os.remove(neg_emb_path)
        gc.collect()

    return combined_emb_path, y_combined_path, padding_size, dimension, total_samples, {
        "pos_samples": pos_samples,
        "neg_samples": neg_samples
    }

class BalancedDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x_path, y_path, indices, batch_size, padding_size, dimension):
        self.x_path = x_path
        self.y_path = y_path
        self.indices = indices
        self.batch_size = batch_size
        self.padding_size = padding_size
        self.dimension = dimension
        self.total_samples = get_samples(x_path, padding_size, dimension)

    def __len__(self):
        return int(np.ceil(len(self.indices) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx*self.batch_size : (idx+1)*self.batch_size]
        x_mem = np.memmap(self.x_path, dtype=np.float16, mode='r',
                          shape=(self.total_samples, self.padding_size, self.dimension))
        y_mem = np.memmap(self.y_path, dtype=np.float32, mode='r',
                          shape=(self.total_samples, 2))
        x_batch = np.array(x_mem[batch_indices], dtype=np.float32)
        y_batch = np.array(y_mem[batch_indices], dtype=np.float32)
        return x_batch, y_batch

    def on_epoch_end(self):
        gc.collect()

class IntervalCheckpoint(tf.keras.callbacks.Callback):
    def __init__(self, checkpoint_dir, save_interval_seconds=300, max_to_keep=2):
        super().__init__()
        self.checkpoint_dir = checkpoint_dir
        self.save_interval_seconds = save_interval_seconds
        self.max_to_keep = max_to_keep
        self.last_save_time = time.time()
        self.checkpoints = []
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

    def on_epoch_end(self, epoch, logs=None):
        current_time = time.time()
        if current_time - self.last_save_time >= self.save_interval_seconds:
            checkpoint_filename = f"checkpoint_epoch_{epoch+1}_{int(current_time)}.h5"
            checkpoint_path = os.path.join(self.checkpoint_dir, checkpoint_filename)
            self.model.save(checkpoint_path)
            print(f"Saved checkpoint: {checkpoint_path}")
            self.checkpoints.append(checkpoint_path)
            if len(self.checkpoints) > self.max_to_keep:
                file_to_remove = self.checkpoints.pop(0)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                    print(f"Removed old checkpoint: {file_to_remove}")
            self.last_save_time = current_time

class EnhancedTextClassifier:
    def __init__(self, input_shape, class_weights=None):
        self.input_shape = input_shape
        self.class_weights = class_weights
        self.model = self._build_model()
        self._compile_model()

    def _build_model(self):
        input_layer = Input(shape=self.input_shape)
        branches = []
        for kernel_size in [2, 3, 5]:
            branch = Conv1D(64, kernel_size, padding='same', activation='relu')(input_layer)
            branch = BatchNormalization()(branch)
            branch = GlobalMaxPooling1D()(branch)
            branches.append(branch)
        x = concatenate(branches)
        x = Dense(128, activation='relu')(x)
        x = Dropout(0.5)(x)
        x = Dense(64, activation='relu')(x)
        x = Dropout(0.3)(x)
        outputs = Dense(2, activation='softmax')(x)
        return Model(inputs=input_layer, outputs=outputs)

    def _compile_model(self):
        self.model.compile(
            loss='categorical_crossentropy',
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            metrics=['accuracy', self._f1_score]
        )

    def _f1_score(self, y_true, y_pred):
        true_positives = tf.reduce_sum(tf.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
        possible_positives = tf.reduce_sum(tf.round(tf.clip_by_value(y_true, 0, 1)))
        predicted_positives = tf.reduce_sum(tf.round(tf.clip_by_value(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        recall = true_positives / (possible_positives + K.epsilon())
        return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

    def train(self, train_gen, val_gen, epochs=15, checkpoint_dir="/content/drive/MyDrive/checkpoints"):
        callbacks = [
            EarlyStopping(monitor='val_f1_score', patience=3, mode='max', restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_f1_score', factor=0.2, patience=2, mode='max'),
            IntervalCheckpoint(checkpoint_dir=checkpoint_dir, save_interval_seconds=300, max_to_keep=2)
        ]
        self.history = self.model.fit(
            train_gen,
            validation_data=val_gen,
            epochs=epochs,
            callbacks=callbacks,
            class_weight=self.class_weights,
            verbose=1
        )

    def evaluate(self, test_gen, threshold=DEFAULT_THRESHOLD):
        y_true, y_pred = [], []
        for x_batch, y_batch in test_gen:
            preds = self.model.predict(x_batch)
            y_pred.extend((preds[:, 1] > threshold).astype(int))
            y_true.extend(np.argmax(y_batch, axis=1))
        return self._calculate_metrics(y_true, y_pred)

    def _calculate_metrics(self, y_true, y_pred):
        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
        return {
            'accuracy': np.mean(np.array(y_true) == np.array(y_pred)),
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'cm': confusion_matrix(y_true, y_pred)
        }

def save_model_with_metadata(model, model_dir, metadata=None):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    model_path = os.path.join(model_dir, "model.h5")
    model.save(model_path)
    print(f"Model saved to {model_path}")
    if metadata:
        metadata_path = os.path.join(model_dir, "metadata.json")
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f, indent=4)
        print(f"Model metadata saved to {metadata_path}")
    return model_path

def CNN(x_path, y_path, dimension, total_samples, padding_size,
        batch_size=64, nb_epoch=15, model_save_dir=None, dataset_metadata=None):
    print_memory_usage("Before CNN model training")
    pos_samples = dataset_metadata['pos_samples']
    neg_samples = dataset_metadata['neg_samples']
    class_weights = {
        0: (pos_samples + neg_samples) / (2 * neg_samples),
        1: (pos_samples + neg_samples) / (2 * pos_samples)
    }
    indices = np.random.permutation(total_samples)
    train_idx, test_idx = train_test_split(indices, test_size=0.2)
    train_gen = BalancedDataGenerator(x_path, y_path, train_idx, batch_size, padding_size, dimension)
    test_gen = BalancedDataGenerator(x_path, y_path, test_idx, batch_size, padding_size, dimension)
    classifier = EnhancedTextClassifier((padding_size, dimension), class_weights)
    classifier.train(train_gen, test_gen, epochs=nb_epoch, checkpoint_dir="/content/drive/MyDrive/checkpoints")
    metrics = classifier.evaluate(test_gen, threshold=DEFAULT_THRESHOLD)
    result = {
        'model': classifier.model,
        'metrics': metrics,
        'history': classifier.history.history
    }
    if model_save_dir:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_dir = os.path.join(model_save_dir, f"model_{timestamp}")
        metadata = {
            'timestamp': timestamp,
            'input_shape': (padding_size, dimension),
            'class_distribution': dataset_metadata,
            'optimal_threshold': DEFAULT_THRESHOLD,
            'metrics': metrics,
            'training_history': classifier.history.history
        }
        saved_model_path = save_model_with_metadata(classifier.model, save_dir, metadata)
        result['saved_model_path'] = saved_model_path
    print_memory_usage("After CNN model training")
    return result

if __name__ == "__main__":
    try:
        pos_path = "/content/sample_data/out/positive_reports.txt"
        neg_path = "/content/sample_data/out/negative_reports.txt"
        model_path = "/content/drive/MyDrive/sem_model_4"
        model_save_dir = "/content/drive/MyDrive/trained_models"

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
            print(f"Created directory for saving models: {model_save_dir}")

        checkpoints_dir = "/content/drive/MyDrive/checkpoints"
        if not os.path.exists(checkpoints_dir):
            os.makedirs(checkpoints_dir)
            print(f"Created directory for checkpoints: {checkpoints_dir}")

        print("Starting preprocessing...")
        print_memory_usage("Initial")
        x_path, y_path, padding_size, dimension, total_samples, dataset_metadata = CNN_preprocess(
            pos_path,
            neg_path,
            model_path,
            max_reports=None,
            batch_size=256
        )
        print_memory_usage("After preprocessing")
        results = CNN(
            x_path,
            y_path,
            dimension,
            total_samples,
            padding_size,
            batch_size=64,
            nb_epoch=50,
            model_save_dir=model_save_dir,
            dataset_metadata=dataset_metadata
        )
        print("\nFinal Metrics:")
        print(f"Accuracy: {results['metrics']['accuracy']:.4f}")
        print(f"Precision (Positive class): {results['metrics']['precision']:.4f}")
        print(f"Recall (Positive class): {results['metrics']['recall']:.4f}")
        print(f"F1-Score: {results['metrics']['f1']:.4f}")
        print("\nConfusion Matrix:")
        print(results['metrics']['cm'])
        if 'saved_model_path' in results:
            print(f"\nModel saved to: {results['saved_model_path']}")
        del results['model']
        K.clear_session()
        gc.collect()
    except Exception as e:
        print(f"Error occurred: {e}")
        import traceback
        traceback.print_exc()
    finally:
        if 'x_path' in locals() and os.path.exists(x_path):
            os.remove(x_path)
        if 'y_path' in locals() and os.path.exists(y_path):
            os.remove(y_path)
        print("Cleanup complete")
    print("Process completed successfully!")

Created directory for checkpoints: /content/drive/MyDrive/checkpoints
Starting preprocessing...
[MEMORY] Initial: 1252.52 MB
[MEMORY] Before loading reports: 1252.52 MB
[MEMORY] After loading positive reports: 1252.52 MB
[MEMORY] After processing positive reports: 1254.33 MB
[MEMORY] After loading negative reports: 1261.55 MB
[MEMORY] After processing negative reports: 1306.92 MB
Using padding size: 30
Loading transformer model...
[MEMORY] After loading model: 1543.63 MB
[MEMORY] Before generating positive embeddings: 1543.63 MB


Generating embeddings: 100%|██████████| 27/27 [00:15<00:00,  1.71it/s]


[MEMORY] After generating positive embeddings: 1832.48 MB
[MEMORY] Before generating negative embeddings: 1832.48 MB


Generating embeddings: 100%|██████████| 479/479 [03:59<00:00,  2.00it/s]


[MEMORY] After generating negative embeddings: 1844.02 MB
[MEMORY] After clearing model: 1820.02 MB
Positive samples: 6748, Negative samples: 122612
Merging positive embeddings...


100%|██████████| 14/14 [00:01<00:00, 13.98it/s]


Merging negative embeddings...


100%|██████████| 246/246 [00:26<00:00,  9.35it/s]


[MEMORY] After merging embeddings: 3538.93 MB


100%|██████████| 2/2 [00:00<00:00, 8297.34it/s]

[MEMORY] After creating labels: 3539.66 MB





[MEMORY] After preprocessing: 1740.62 MB
[MEMORY] Before CNN model training: 1740.62 MB


  self._warn_if_super_not_called()


Epoch 1/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 17ms/step - _f1_score: 0.9510 - accuracy: 0.9510 - loss: 0.2871 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2180 - learning_rate: 0.0010
Epoch 2/50
[1m  18/1617[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 6ms/step - _f1_score: 0.9631 - accuracy: 0.9631 - loss: 0.1559

  current = self.get_monitor_value(logs)
  callback.on_epoch_end(epoch, logs)


[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 10ms/step - _f1_score: 0.9602 - accuracy: 0.9602 - loss: 0.1562 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2110 - learning_rate: 0.0010
Epoch 3/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - _f1_score: 0.9611 - accuracy: 0.9611 - loss: 0.1505 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2137 - learning_rate: 0.0010
Epoch 4/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 11ms/step - _f1_score: 0.9600 - accuracy: 0.9600 - loss: 0.1510 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2030 - learning_rate: 0.0010
Epoch 5/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 10ms/step - _f1_score: 0.9612 - accuracy: 0.9612 - loss: 0.1430 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2113 - learning_rate: 0.0010
Epoch 6/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s



Saved checkpoint: /content/drive/MyDrive/checkpoints/checkpoint_epoch_17_1743788180.h5
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - _f1_score: 0.9611 - accuracy: 0.9611 - loss: 0.1327 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2417 - learning_rate: 0.0010
Epoch 18/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 10ms/step - _f1_score: 0.9605 - accuracy: 0.9605 - loss: 0.1350 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2657 - learning_rate: 0.0010
Epoch 19/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 10ms/step - _f1_score: 0.9605 - accuracy: 0.9605 - loss: 0.1319 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.1938 - learning_rate: 0.0010
Epoch 20/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - _f1_score: 0.9610 - accuracy: 



Saved checkpoint: /content/drive/MyDrive/checkpoints/checkpoint_epoch_35_1743788493.h5
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 10ms/step - _f1_score: 0.9616 - accuracy: 0.9616 - loss: 0.1290 - val__f1_score: 0.9600 - val_accuracy: 0.9599 - val_loss: 0.2265 - learning_rate: 0.0010
Epoch 36/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - _f1_score: 0.9607 - accuracy: 0.9607 - loss: 0.1355 - val__f1_score: 0.9614 - val_accuracy: 0.9613 - val_loss: 0.3013 - learning_rate: 0.0010
Epoch 37/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 11ms/step - _f1_score: 0.9624 - accuracy: 0.9624 - loss: 0.1237 - val__f1_score: 0.9613 - val_accuracy: 0.9612 - val_loss: 0.2539 - learning_rate: 0.0010
Epoch 38/50
[1m1617/1617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 10ms/step - _f1_score: 0.9625 - accuracy: 

Traceback (most recent call last):
  File "<ipython-input-3-87d465172b03>", line 492, in <cell line: 0>
    results = CNN(
              ^^^^
  File "<ipython-input-3-87d465172b03>", line 436, in CNN
    metrics = classifier.evaluate(test_gen, threshold=DEFAULT_THRESHOLD)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-3-87d465172b03>", line 386, in evaluate
    preds = self.model.predict(x_batch)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/progbar.py", line 119, in update
    numdigits = int(math.log10(self.target)) + 1
                    ^^^^^^^^^^^^^^^^^^^^^^^
ValueError: math domain error


Cleanup complete
Process completed successfully!
