In [None]:
import h5py

# Open the HDF5 file
train_path = r'/content/Train (1).h5'  # Replace with your file path
with h5py.File(train_path, 'r') as file:
    # Recursively print the structure of the file
    def print_structure(name, obj):
        if isinstance(obj, h5py.Group):
            print(f"Group: {name}")
        elif isinstance(obj, h5py.Dataset):
            print(f"Dataset: {name} - Shape: {obj.shape} - Data type: {obj.dtype}")

    file.visititems(print_structure)

Dataset: images - Shape: (2626, 128, 128, 3) - Data type: uint8
Dataset: labels - Shape: (2626,) - Data type: int32


In [None]:
import h5py

# Open the HDF5 file
test_path = r'/content/Test (1).h5'  # Replace with your file path
with h5py.File(test_path, 'r') as file:
    # Recursively print the structure of the file
    def print_structure(name, obj):
        if isinstance(obj, h5py.Group):
            print(f"Group: {name}")
        elif isinstance(obj, h5py.Dataset):
            print(f"Dataset: {name} - Shape: {obj.shape} - Data type: {obj.dtype}")

    file.visititems(print_structure)

Dataset: images - Shape: (120, 128, 128, 3) - Data type: uint8
Dataset: labels - Shape: (120,) - Data type: int32


In [2]:
import os
import h5py

def inspect_h5_file(file_path):
    # Check if the file exists
    if os.path.exists(file_path):
        with h5py.File(file_path, "r") as file:
            print(f"Keys in {file_path}: {list(file.keys())}")
    else:
        print(f"File {file_path} not found!")

# Check current working directory
print("Current Working Directory:", os.getcwd())



# Inspect the training file
inspect_h5_file("Train.h5")

# Inspect the test file
inspect_h5_file("Test.h5")


Current Working Directory: /content
File Train.h5 not found!
File Test.h5 not found!


In [None]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

# Load the dataset
def load_data(train_path, test_path):
    train_dataset = h5py.File(train_path, "r")
    test_dataset = h5py.File(test_path, "r")

    train_x_orig = np.array(train_dataset["images"][:])  # Replace with the correct key
    train_y_orig = np.array(train_dataset["labels"][:])  # Replace with the correct key

    test_x_orig = np.array(test_dataset["images"][:])  # Replace with the correct key
    test_y_orig = np.array(test_dataset["labels"][:])  # Replace with the correct key

    return train_x_orig, train_y_orig, test_x_orig, test_y_orig

In [None]:
# Normalize the dataset
def preprocess_data(train_x_orig, test_x_orig):
    train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
    test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

    train_x = train_x_flatten / 255.
    test_x = test_x_flatten / 255.

    return train_x, test_x

In [None]:
# Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

In [None]:
# Cost function for multi-class classification
def compute_cost(A, Y):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(A)) / m
    return cost

In [None]:
# Gradient Descent
def gradient_descent(X, Y, learning_rate, num_iterations):
    n_x, m = X.shape
    n_y = Y.shape[0]

    W = np.random.randn(n_y, n_x) * 0.01
    b = np.zeros((n_y, 1))

    for i in range(num_iterations):
        Z = np.dot(W, X) + b
        A = softmax(Z)

        cost = compute_cost(A, Y)

        dZ = A - Y
        dW = np.dot(dZ, X.T) / m
        db = np.sum(dZ, axis=1, keepdims=True) / m

        W -= learning_rate * dW
        b -= learning_rate * db

        if i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return W, b

In [None]:
# Predict function
def predict(W, b, X):
    Z = np.dot(W, X) + b
    A = softmax(Z)
    predictions = np.argmax(A, axis=0)
    return predictions

In [None]:
# Convert labels to one-hot encoding
def convert_to_one_hot(Y, num_classes):
    Y_one_hot = np.eye(num_classes)[Y.reshape(-1)].T
    return Y_one_hot

In [None]:
# Load and preprocess the data
# train_x_orig, train_y_orig, test_x_orig, test_y_orig = load_data("Train.h5", "Test.h5")
train_x_orig, train_y_orig, test_x_orig, test_y_orig = load_data("/content/Train (1).h5", "/content/Test (1).h5")

train_x, test_x = preprocess_data(train_x_orig, test_x_orig)

In [None]:
# Convert labels to one-hot encoding
num_classes = len(np.unique(train_y_orig))
train_y = convert_to_one_hot(train_y_orig, num_classes)
test_y = convert_to_one_hot(test_y_orig, num_classes)

In [None]:
# Train the model
learning_rate = 0.001
num_iterations = 5000
W, b = gradient_descent(train_x, train_y, learning_rate, num_iterations)

Cost after iteration 0: 1.78078176268458
Cost after iteration 100: 1.2866210530544988
Cost after iteration 200: 1.2087955662220016
Cost after iteration 300: 1.1619564046184871
Cost after iteration 400: 1.126417071799376
Cost after iteration 500: 1.0967943927211592
Cost after iteration 600: 1.0709148588947885
Cost after iteration 700: 1.0476819889471432
Cost after iteration 800: 1.0264566304848906
Cost after iteration 900: 1.006827428903298
Cost after iteration 1000: 0.9885104170624601
Cost after iteration 1100: 0.9712993094104093
Cost after iteration 1200: 0.9550384063397002
Cost after iteration 1300: 0.9396066475099218
Cost after iteration 1400: 0.9249076359818982
Cost after iteration 1500: 0.9108630871945823
Cost after iteration 1600: 0.8974083571784798
Cost after iteration 1700: 0.8844892932223886
Cost after iteration 1800: 0.8720599582288513
Cost after iteration 1900: 0.8600809505382524
Cost after iteration 2000: 0.8485181402261908
Cost after iteration 2100: 0.837341703114572
Cost 

In [None]:
# Test the model
train_predictions = predict(W, b, train_x)
test_predictions = predict(W, b, test_x)

In [None]:
# Calculate accuracy
train_accuracy = np.mean(train_predictions == np.argmax(train_y, axis=0)) * 100
test_accuracy = np.mean(test_predictions == np.argmax(test_y, axis=0)) * 100

print(f"Train accuracy: {train_accuracy:.2f}%")
print(f"Test accuracy: {test_accuracy:.2f}%")

Train accuracy: 84.35%
Test accuracy: 48.33%
