## TFRecord Dataset

### Import Libraries

In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

In [None]:
import tensorflow as tf

# Check TensorFlow version
print("TensorFlow Version:", tf.__version__)

# List all physical devices
physical_devices = tf.config.list_physical_devices()
print("All Physical Devices:", physical_devices)

# List GPU devices
gpus = tf.config.list_physical_devices('GPU')
print("GPUs:", gpus)

# Test TensorFlow GPU availability
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print("GPU device not found. Please ensure that your machine has a compatible GPU and that TensorFlow is set up to use it.")
else:
    print('GPU found at {}. Using the L4 GPU hardware accelerator in Google Colab Pro.'.format(device_name))

# Check if TensorFlow can access the GPU
try:
    with tf.device('/device:GPU:0'):
        print("TensorFlow can access the L4 GPU hardware accelerator in Google Colab Pro")
except RuntimeError as e:
    print("Error accessing the L4 GPU hardware accelerator in Google Colab Pro with TensorFlow:", e)


TensorFlow Version: 2.15.0
All Physical Devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU found at /device:GPU:0. Using the L4 GPU hardware accelerator in Google Colab Pro.
TensorFlow can access the L4 GPU hardware accelerator in Google Colab Pro


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/My Drive/Masters Project/Final Dataset
!ls

/content/gdrive/My Drive/Masters Project/Final Dataset
Testing_Output	Training_Output  X_test.pickle	X_train.pickle	Y_test.pickle  Y_train.pickle


In [None]:
TEST_DIR = "//content//gdrive//My Drive//Masters Project//Final Dataset//Testing_Output" # test data folder
TRAIN_DIR = "//content//gdrive//My Drive//Masters Project//Final Dataset//Training_Output" # train data folder
IMG_SIZE = 224 # image size
CATEGORIES = ["no_tumor_Output","glioma_tumor_Output","meningioma_tumor_Output","pituitary_tumor_Output"]

## TFRecord Dataset

In [None]:
%%time

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_example(image, label):
    feature = {
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(image)
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

def create_tfrecord(data_dir, output_file):
    with tf.io.TFRecordWriter(output_file) as writer:
        for category in CATEGORIES:
            class_index = CATEGORIES.index(category)
            category_path = os.path.join(data_dir, category)
            for img_name in os.listdir(category_path):
                img_path = os.path.join(category_path, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_raw = img.tobytes()
                example = serialize_example(img_raw, class_index)
                writer.write(example)

# Create TFRecord files for training and testing datasets
create_tfrecord(TRAIN_DIR, 'train_dataset.tfrecord')
create_tfrecord(TEST_DIR, 'test_dataset.tfrecord')

# Functions to print resource usage
import psutil
import resource
import tracemalloc

tracemalloc.start()

def print_memory_usage():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    print(f"RSS: {mem_info.rss / 1024 ** 2:.2f} MB")
    print(f"VMS: {mem_info.vms / 1024 ** 2:.2f} MB")

def print_cpu_usage():
    print(f"CPU Usage: {psutil.cpu_percent(interval=1)}%")

def print_disk_usage():
    disk_usage = psutil.disk_usage('/')
    print(f"Total Disk Space: {disk_usage.total / 1024 ** 3:.2f} GB")
    print(f"Used Disk Space: {disk_usage.used / 1024 ** 3:.2f} GB")
    print(f"Disk Usage: {disk_usage.percent}%")

def print_resource_usage():
    usage = resource.getrusage(resource.RUSAGE_SELF)
    print(f"User CPU time: {usage.ru_utime:.2f} seconds")
    print(f"System CPU time: {usage.ru_stime:.2f} seconds")

def print_tracemalloc_usage():
    current, peak = tracemalloc.get_traced_memory()
    print(f"Current Memory Usage: {current / 1024 ** 2:.2f} MB")
    print(f"Peak Memory Usage: {peak / 1024 ** 2:.2f} MB")

print_memory_usage()
print_cpu_usage()
print_disk_usage()
print_resource_usage()
print_tracemalloc_usage()


In [None]:
%cd /content/gdrive/My Drive/Masters Project/Final Dataset/tfrecord
!ls

/content/gdrive/My Drive/Masters Project/Final Dataset/tfrecord
test_dataset.tfrecord  tfrecord_file.tfrecord  train_dataset.tfrecord
