In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter
import random

# Load data
df = pd.read_csv('/content/IRIS.csv')
X = df.drop('species', axis=1).values
y = df['species'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest parameters
n_trees = 10
max_features = int(np.sqrt(X.shape[1]))  # commonly used
tree_list = []

# Train each tree on a bootstrapped sample
for _ in range(n_trees):
    # Bootstrap sampling
    indices = np.random.choice(len(X_train), len(X_train), replace=True)
    X_sample = X_train[indices]
    y_sample = y_train[indices]

    # Select random subset of features
    feature_indices = np.random.choice(X.shape[1], max_features, replace=False)
    tree = DecisionTreeClassifier()
    tree.fit(X_sample[:, feature_indices], y_sample)
    tree_list.append((tree, feature_indices))

# Predict function with majority vote
def predict_forest(X):
    all_preds = []
    for x in X:
        votes = []
        for tree, f_idx in tree_list:
            pred = tree.predict([x[f_idx]])[0]
            votes.append(pred)
        # Majority vote
        majority_vote = Counter(votes).most_common(1)[0][0]
        all_preds.append(majority_vote)
    return np.array(all_preds)

# Make predictions
y_pred = predict_forest(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("iris.csv")

# Drop the 'Id' column if it exists
if 'Id' in df.columns:
    df.drop('Id', axis=1, inplace=True)

# Encode target labels
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

# Split features and target
X = df.drop('Species', axis=1)
y = df['Species']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Part 1: Random Forest with default n_estimators=10
rf_default = RandomForestClassifier(n_estimators=10, random_state=42)
rf_default.fit(X_train, y_train)
y_pred_default = rf_default.predict(X_test)
default_accuracy = accuracy_score(y_test, y_pred_default)
print(f"Default Accuracy (10 trees): {default_accuracy:.2f}")

# Part 2: Tune number of trees and record accuracy
accuracy_scores = {}
best_accuracy = 0
best_n = 0

for n in range(1, 101):
    rf = RandomForestClassifier(n_estimators=n, random_state=42)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracy_scores[n] = acc
    if acc > best_accuracy:
        best_accuracy = acc
        best_n = n

print(f"Best Accuracy: {best_accuracy:.2f} with {best_n} trees")

# Plot accuracy vs number of trees
plt.figure(figsize=(10, 6))
plt.plot(list(accuracy_scores.keys()), list(accuracy_scores.values()), marker='o')
plt.title("Accuracy vs Number of Trees in Random Forest")
plt.xlabel("Number of Trees (n_estimators)")
plt.ylabel("Accuracy")
plt.grid(True)
plt.tight_layout()
plt.show()


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Load the dataset
df = pd.read_csv("/content/iris (3).csv")

# Drop the 'Id' column if it exists
if 'Id' in df.columns:
    df.drop('Id', axis=1, inplace=True)

# Encode target labels
label_encoder = LabelEncoder()
df['species'] = label_encoder.fit_transform(df['species'])

# Split features and target
X = df.drop('species', axis=1)
y = df['species']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Part 1: Random Forest with default n_estimators=10
rf_default = RandomForestClassifier(n_estimators=10, random_state=42)
rf_default.fit(X_train, y_train)
y_pred_default = rf_default.predict(X_test)
default_accuracy = accuracy_score(y_test, y_pred_default)
print(f"Default Accuracy (10 trees): {default_accuracy:.2f}")

# Part 2: Tune number of trees and record accuracy
accuracy_scores = {}
best_accuracy = 0
best_n = 0

for n in range(1, 101):
    rf = RandomForestClassifier(n_estimators=n, random_state=42)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracy_scores[n] = acc
    if acc > best_accuracy:
        best_accuracy = acc
        best_n = n

print(f"Best Accuracy: {best_accuracy:.2f} with {best_n} trees")

best_conf_matrix = confusion_matrix(y_test, y_pred)
print(best_conf_matrix)



Default Accuracy (10 trees): 1.00
Best Accuracy: 1.00 with 1 trees
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
