In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Random Forest
class RandomForest:
    def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            # Randomly select samples with replacement
            sample_indices = np.random.choice(n_samples, size=n_samples, replace=True)
            X_sampled = X_train[sample_indices]
            y_sampled = y_train[sample_indices]
            tree.fit(X_sampled, y_sampled)
            self.trees.append(tree)

    def predict(self, X_test):
        predictions = np.zeros(X_test.shape[0])
        for tree in self.trees:
            predictions += tree.predict(X_test)
        return np.sign(predictions).astype(int)





In [None]:
class DecisionTree:
    def __init__(self, max_depth=None, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.split_feature = None
        self.split_threshold = None
        self.left_child = None
        self.right_child = None
        self.prediction = None

    def fit(self, X, y, depth=0):
        n_samples, n_features = X.shape
        if depth == self.max_depth or n_samples < self.min_samples_split or np.all(y == y[0]):
            self.prediction = np.mean(y)
            return

        best_gain = 0
        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                left_indices = X[:, feature_index] < threshold
                right_indices = ~left_indices
                if np.sum(left_indices) == 0 or np.sum(right_indices) == 0:
                    continue
                gain = self._information_gain(y, y[left_indices], y[right_indices])
                if gain > best_gain:
                    best_gain = gain
                    self.split_feature = feature_index
                    self.split_threshold = threshold
                    self.left_indices = left_indices
                    self.right_indices = right_indices

        if best_gain == 0:
            self.prediction = np.mean(y)
            return

        self.left_child = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
        self.left_child.fit(X[self.left_indices], y[self.left_indices], depth+1)
        self.right_child = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
        self.right_child.fit(X[self.right_indices], y[self.right_indices], depth+1)

    def _entropy(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return -np.sum(probabilities * np.log2(probabilities))

    def _information_gain(self, y, y_left, y_right):
        p = len(y_left) / len(y)
        return self._entropy(y) - p * self._entropy(y_left) - (1 - p) * self._entropy(y_right)

    def predict(self, X):
        if self.prediction is not None:
            return np.ones(X.shape[0]) * self.prediction
        else:
            predictions = np.zeros(X.shape[0])
            left_indices = X[:, self.split_feature] < self.split_threshold
            predictions[left_indices] = self.left_child.predict(X[left_indices])
            predictions[~left_indices] = self.right_child.predict(X[~left_indices])
            return predictions

In [None]:

class GradientBoostedTree:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X_train, y_train):
        n_samples = X_train.shape[0]
        # Initialize the prediction with the mean of the target values
        pred = np.mean(y_train) * np.ones(n_samples)

        for _ in range(self.n_estimators):
            # Calculate the residuals
            residuals = y_train - pred
            # Fit a decision tree to the residuals
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_train, residuals)
            # Update the prediction using the tree and learning rate
            pred += self.learning_rate * tree.predict(X_train)
            # Save the tree
            self.trees.append(tree)

    def predict(self, X_test):
        n_samples = X_test.shape[0]
        # Initialize predictions with zeros
        pred = np.zeros(n_samples)
        # Make predictions using each tree and weight them
        for tree in self.trees:
            pred += tree.predict(X_test)
        # Apply sign function to get the final predictions
        return np.sign(pred).astype(int)

In [None]:
#Animal dataset
path='/content/drive/MyDrive/PRNN/Assign_3/animal/animals10_data.npz'
data = np.load(path)

# Load the data from the .npz file
image_data = np.load(path)
image_data=image_data['data']
# Extract images and labels
labels = data['labels']

In [None]:
# Define the sizes for training, validation, and testing sets
train_size = int(0.1 * len(image_data))
val_size = int(0.85 * len(image_data))
test_size = len(image_data) - train_size - val_size

# Shuffle the data
indices = np.random.permutation(len(image_data))
image_data_shuffled = image_data[indices]
labels_shuffled = labels[indices]

# Split the data into training, validation, and testing sets
x_train = image_data_shuffled[:train_size]
y_train = labels_shuffled[:train_size]

x_val = image_data_shuffled[train_size:train_size+val_size]
y_val = labels_shuffled[train_size:train_size+val_size]

x_test = image_data_shuffled[train_size+val_size:]
y_test = labels_shuffled[train_size+val_size:]

# Print the shapes of the split sets
print("Shape of x_train:", x_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of x_val:", x_val.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of x_test:", x_test.shape)
print("Shape of y_test:", y_test.shape)


Shape of x_train: (2617, 4096)
Shape of y_train: (2617,)
Shape of x_val: (22252, 4096)
Shape of y_val: (22252,)
Shape of x_test: (1310, 4096)
Shape of y_test: (1310,)


In [None]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU")


GPU is not available, using CPU


In [None]:
rf = RandomForest(n_estimators=100)
rf.fit(x_train, y_train)
y_pred_rf = rf.predict(x_test)

# Gradient Boosted Tree
gbt = GradientBoostedTree(n_estimators=100, learning_rate=0.1, max_depth=3)
gbt.fit(x_train, y_train)
y_pred_gbt = gbt.predict(x_test)

# Evaluate accuracy
accuracy_rf = np.mean(y_pred_rf == y_test)
accuracy_gbt = np.mean(y_pred_gbt == y_test)

print("Random Forest Accuracy:", accuracy_rf)
print("Gradient Boosted Tree Accuracy:", accuracy_gbt)