In [2]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree used in Random Forest
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

    def _information_gain(self, parent, left, right):
        def entropy(y):
            probs = np.bincount(y) / len(y)
            return -np.sum([p * np.log2(p) for p in probs if p > 0])

        n = len(parent)
        return entropy(parent) - (len(left) / n * entropy(left) + len(right) / n * entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)


# Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
dt = DecisionTree(max_depth=5)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, dt, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'Decision Tree': dt, 'Gradient Boosting': gb, 'XGBoost':xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")


KNN Accuracy: 0.9507
Decision Tree Accuracy: 0.8963


TypeError: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'

KNN Accuracy: 0.9507


TypeError: '>' not supported between instances of 'NoneType' and 'int'

In [8]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)

# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree used in Random Forest
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

      def _information_gain(self, parent, left, right):
        def entropy(y):
           y = y.astype(int)  # Convert y to integers
           probs = np.bincount(y) / len(y)
           return -np.sum([p * np.log2(p) for p in probs if p > 0])

        n = len(parent)
        return entropy(parent) - (len(left) / n * entropy(left) + len(right) / n * entropy(right))


    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

        # Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
rf = RandomForest(n_estimators=10, max_depth=5, max_features=None)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, dt, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'RandomForest':rf, 'Gradient Boosting': gb, 'XGBoost': xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")




IndentationError: unindent does not match any outer indentation level (<tokenize>, line 95)

In [11]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)

# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree used in Random Forest
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

    def _information_gain(self, parent, left, right):
        def entropy(y):
            y = np.array(y, dtype=int)  # Ensure integer values
            y = y - y.min()  # Shift values to be non-negative if needed
            if len(y) == 0:  # Prevent division by zero
               return 0
            probs = np.bincount(y) / len(y)
            return -np.sum([p * np.log2(p) for p in probs if p > 0])


        n = len(parent)
        return entropy(parent) - (len(left) / n * entropy(left) + len(right) / n * entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
rf = RandomForest(n_estimators=10, max_depth=5, max_features=None)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, dt, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'RandomForest':rf, 'Gradient Boosting': gb, 'XGBoost': xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")





Class distribution before SMOTE: Counter({1: 1655, 2: 295, 3: 176})
Class distribution after SMOTE: Counter({2: 1655, 1: 1655, 3: 1655})
KNN Accuracy: 0.9507
RandomForest Accuracy: 0.8963
Gradient Boosting Accuracy: 0.6647


UFuncTypeError: Cannot cast ufunc 'subtract' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

In [19]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

y_resampled = np.round(y_resampled).astype(int)  # Remove any floating points
y_resampled = np.clip(y_resampled, 1, 3)  # Ensure values are only in [1, 2, 3]


# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))
print("Unique values after SMOTE:", np.unique(y_resampled))


# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

print("Unique values in y_train:", np.unique(y_train))
print("Unique values in y_test:", np.unique(y_test))


# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = np.linalg.norm(self.X_train - x, axis=1)
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree used in Random Forest
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold


    def _entropy(self, y):
       y = np.array(y, dtype=int)
       if len(y) == 0:
          return 0
       if np.min(y) < 0:
           raise ValueError(f"Negative values found in y: {y}")  # Debugging step
       if np.max(y) > 3:
           raise ValueError(f"Unexpected high values found in y: {y}")  # Another check
       probs = np.bincount(y) / len(y)
       return -np.sum(probs[probs > 0] * np.log2(probs[probs > 0]))


    def _information_gain(self, parent, left, right):
        n = len(parent)
        return self._entropy(parent) - (len(left) / n * self._entropy(left) + len(right) / n * self._entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
rf = RandomForest(n_estimators=10, max_depth=5, max_features=None)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, dt, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'RandomForest':rf, 'Gradient Boosting': gb, 'XGBoost': xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")




Class distribution before SMOTE: Counter({1: 1655, 2: 295, 3: 176})
Class distribution after SMOTE: Counter({2: 1655, 1: 1655, 3: 1655})
Unique values after SMOTE: [1 2 3]
Unique values in y_train: [1 2 3]
Unique values in y_test: [1 2 3]
KNN Accuracy: 0.9507
RandomForest Accuracy: 0.8943


ValueError: Negative values found in y: [ 0  0  0 ...  0  0 -1]

In [23]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)

# Debugging: Check for negative values in y_resampled
print("Unique values in y_resampled:", np.unique(y_resampled))
assert np.all(y_resampled > 0), "Error: Negative values found in y_resampled!"

# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Debugging: Check for negative values in y_train
print("Unique values in y_train:", np.unique(y_train))
assert np.all(y_train > 0), "Error: Negative values found in y_train!"

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = np.linalg.norm(self.X_train - x, axis=1)
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree used in Random Forest
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

    def _entropy(self, y):
      y = np.array(y)
      if len(y) == 0:
         return 0
      if np.min(y) < 1:  # Check for invalid class labels
         raise ValueError(f"Invalid class labels found in y: {y}")
      counts = np.bincount(y - 1)  # Adjust class labels for bincount
      probs = counts / len(y)
      return -np.sum(probs[probs > 0] * np.log2(probs[probs > 0]))


    def _information_gain(self, parent, left, right):
        n = len(parent)
        return self._entropy(parent) - (len(left) / n * self._entropy(left) + len(right) / n * self._entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])
# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
rf = RandomForest(n_estimators=10, max_depth=5, max_features=None)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, dt, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'RandomForest':rf, 'Gradient Boosting': gb, 'XGBoost': xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")





Class distribution before SMOTE: Counter({1: 1655, 2: 295, 3: 176})
Unique values in y_resampled: [1 2 3]
Class distribution after SMOTE: Counter({2: 1655, 1: 1655, 3: 1655})
Unique values in y_train: [1 2 3]
KNN Accuracy: 0.9507
RandomForest Accuracy: 0.8983


ValueError: Invalid class labels found in y: [-0.00755287 -0.00755287  0.99244713 ... -0.00755287  0.99244713
 -1.00755287]

In [24]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)  # Ensure integer labels

# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale only X_train and X_test, NOT y_train
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = np.linalg.norm(self.X_train - x, axis=1)
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

    def _entropy(self, y):
        y = np.array(y)
        if len(y) == 0:
            return 0
        probs = np.bincount(y) / len(y)
        probs = probs[probs > 0]  # Avoid log(0)
        return -np.sum(probs * np.log2(probs))

    def _information_gain(self, parent, left, right):
        n = len(parent)
        return self._entropy(parent) - (len(left) / n * self._entropy(left) + len(right) / n * self._entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):  # Fixed syntax error
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
dt = DecisionTree(max_depth=5)
rf = RandomForest(n_estimators=10, max_depth=5)

# Train and evaluate models
models = {'KNN': knn, 'Decision Tree': dt, 'Random Forest': rf}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")


Class distribution before SMOTE: Counter({1: 1655, 2: 295, 3: 176})
Class distribution after SMOTE: Counter({2: 1655, 1: 1655, 3: 1655})
KNN Accuracy: 0.9507
Decision Tree Accuracy: 0.8963
Random Forest Accuracy: 0.8973


In [29]:
import numpy as np
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("fetal_health.csv")
X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy().astype(int)

# Display class distribution before SMOTE
print("Class distribution before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
y_resampled = y_resampled.astype(int)  # Ensure integer labels

# Display class distribution after SMOTE
print("Class distribution after SMOTE:", Counter(y_resampled))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale only X_train and X_test, NOT y_train
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Manual implementation of K-Nearest Neighbors (KNN)
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = np.linalg.norm(self.X_train - x, axis=1)
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_nearest_labels).most_common(1)
            predictions.append(most_common[0][0])
        return np.array(predictions)

# Manual implementation of Decision Tree
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feat, best_thresh = self._best_split(X, y)
        if best_feat is None:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feat] < best_thresh
        right_idx = ~left_idx
        left_subtree = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._grow_tree(X[right_idx], y[right_idx], depth + 1)

        return (best_feat, best_thresh, left_subtree, right_subtree)

    def _best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for i in range(self.n_features):
            thresholds = np.unique(X[:, i])
            for threshold in thresholds:
                left_idx = X[:, i] < threshold
                right_idx = ~left_idx
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, split_idx, split_threshold = gain, i, threshold
        return split_idx, split_threshold

    def _entropy(self, y):
       y = np.array(y, dtype=int)  # Convert to integer
       if len(y) == 0:
          return 0
       unique_classes, counts = np.unique(y, return_counts=True)
       probs = counts / len(y)
       return -np.sum(probs * np.log2(probs))  # Compute entropy safely


    def _information_gain(self, parent, left, right):
        n = len(parent)
        return self._entropy(parent) - (len(left) / n * self._entropy(left) + len(right) / n * self._entropy(right))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _traverse_tree(self, x, node):
        if not isinstance(node, tuple):
            return node
        feature_idx, threshold, left, right = node
        return self._traverse_tree(x, left) if x[feature_idx] < threshold else self._traverse_tree(x, right)

# Manual implementation of Random Forest
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):  # Fixed syntax error
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(tree_predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Manual implementation of Gradient Boosting
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.full(X.shape[0], self.initial_prediction)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of XGBoost
class XGBoost:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return np.round(pred).astype(int)

# Manual implementation of Ensemble Model with Majority Voting
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.array([Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))])

# Initialize models
knn = KNN(k=3)
rf = RandomForest(n_estimators=10, max_depth=5)
gb = GradientBoosting(n_estimators=50, learning_rate=0.1, max_depth=3)
xgb = XGBoost(n_estimators=100, learning_rate=0.1, max_depth=3)

# Create ensemble model
ensemble = EnsembleModel(models=[knn, rf, gb,xgb])

# Train and evaluate models
models = {'KNN': knn, 'RandomForest':rf, 'Gradient Boosting': gb, 'XGBoost': xgb, 'Ensemble': ensemble}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")



Class distribution before SMOTE: Counter({1: 1655, 2: 295, 3: 176})
Class distribution after SMOTE: Counter({2: 1655, 1: 1655, 3: 1655})
KNN Accuracy: 0.9507
RandomForest Accuracy: 0.8963
Gradient Boosting Accuracy: 0.6647


UFuncTypeError: Cannot cast ufunc 'subtract' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'