In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers with poor performance intentionally
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=1, max_depth=1, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1, max_depth=1, random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=50, leaf_size=1),  # Increasing neighbors and reducing leaf size
    "SVM": SVC(kernel='linear', C=0.01),  # Very low C value
    "Naive Bayes": GaussianNB(priors=[0.1, 0.1, 0.8]),  # Setting bad priors
    "Logistic Regression": LogisticRegression(max_iter=5, solver='sag', random_state=42)  # Very low max_iter and inappropriate solver
}

# Train and evaluate each classifier, including training and testing times
results = []
for name, clf in classifiers.items():
    start_train = time.time()
    clf.fit(X_train, y_train)
    end_train = time.time()
    
    start_test = time.time()
    y_pred = clf.predict(X_test)
    end_test = time.time()
    
    accuracy = accuracy_score(y_test, y_pred)
    train_time = end_train - start_train
    test_time = end_test - start_test
    
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Testing Time (s)": test_time
    })

# Convert results to a DataFrame for better visualization
accuracy_table = pd.DataFrame(results)


accuracy_table




Unnamed: 0,Classifier,Accuracy,Training Time (s),Testing Time (s)
0,Random Forest,0.7,0.005983,0.001994
1,Gradient Boosting,0.966667,0.007978,0.000997
2,KNN,0.966667,0.001992,0.004987
3,SVM,0.966667,0.00399,0.000998
4,Naive Bayes,0.933333,0.001997,0.000997
5,Logistic Regression,0.833333,0.007976,0.0


In [18]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Load the Iris dataset
df = pd.read_csv('E:/MKNN/Final/Crop_Recommendation.csv')
X = df.drop('target',axis=1)
y = df['target']

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers with poor performance intentionally
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=1, max_depth=1, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1, max_depth=1, random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=130, leaf_size=1),  # Increasing neighbors and reducing leaf size
    "SVM": SVC(kernel='linear', C=0.01),  # Very low C value
    "Naive Bayes": GaussianNB(var_smoothing=10e-1),  # Setting bad priors
    "Logistic Regression": LogisticRegression(max_iter=5, solver='sag', random_state=42)  # Very low max_iter and inappropriate solver
}

# Train and evaluate each classifier, including training and testing times
results = []
for name, clf in classifiers.items():
    start_train = time.time()
    clf.fit(X_train, y_train)
    end_train = time.time()
    
    start_test = time.time()
    y_pred = clf.predict(X_test)
    end_test = time.time()
    
    accuracy = accuracy_score(y_test, y_pred)
    train_time = end_train - start_train
    test_time = end_test - start_test
    
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Testing Time (s)": test_time
    })

# Convert results to a DataFrame for better visualization
accuracy_table = pd.DataFrame(results)


accuracy_table




Unnamed: 0,Classifier,Accuracy,Training Time (s),Testing Time (s)
0,Random Forest,0.079727,0.008016,0.001992
1,Gradient Boosting,0.589977,0.139716,0.002986
2,KNN,0.792711,0.011971,0.145571
3,SVM,0.113895,0.262296,0.053858
4,Naive Bayes,0.851936,0.003989,0.002993
5,Logistic Regression,0.886105,0.021939,0.002059


In [41]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Load the Iris dataset
df = pd.read_csv('D:/Breast_Cancer.csv')
X = df.drop('Target',axis=1)
y = df['Target']

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=1, max_depth=1, max_features=1, min_samples_split=500, min_samples_leaf=20, random_state=42),  # Very low trees, shallow depth, limited features
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1, max_depth=1, learning_rate=0.01, random_state=42),  # Very low learning rate and n_estimators
    "KNN": KNeighborsClassifier(n_neighbors=100,  algorithm='brute',  metric='minkowski', p=1000),  # Further increase n_neighbors, limit algorithm choice
    "SVM": SVC(kernel='linear', C=1e-6),  # Extremely low C value
    "Naive Bayes": GaussianNB(var_smoothing=100),  # Significantly increase var_smoothing for high variance
    "Logistic Regression": LogisticRegression(max_iter=2, C=1e-6, solver='sag', random_state=42)  # Extremely low C and max_iter
}


# Train and evaluate each classifier, including training and testing times
results = []
for name, clf in classifiers.items():
    start_train = time.time()
    clf.fit(X_train, y_train)
    end_train = time.time()
    
    start_test = time.time()
    y_pred = clf.predict(X_test)
    end_test = time.time()
    
    accuracy = accuracy_score(y_test, y_pred)
    train_time = end_train - start_train
    test_time = end_test - start_test
    
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Testing Time (s)": test_time
    })

# Convert results to a DataFrame for better visualization
accuracy_table = pd.DataFrame(results)


accuracy_table




Unnamed: 0,Classifier,Accuracy,Training Time (s),Testing Time (s)
0,Random Forest,0.622807,0.00791,0.001997
1,Gradient Boosting,0.622807,0.002991,0.000997
2,KNN,0.780702,0.001996,0.048931
3,SVM,0.622807,0.0,0.0
4,Naive Bayes,0.622807,0.0,0.0
5,Logistic Regression,0.622807,0.0,0.015621


In [52]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Load the Iris dataset
data = pd.read_csv("D:/diabetes.csv")
X = data.drop('Target', axis=1)
y = data['Target']

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers with poor performance intentionally
classifiers = {
   "Random Forest": RandomForestClassifier(n_estimators=1, max_depth=1, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1, max_depth=1, random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=177, leaf_size=100),  # Increasing neighbors and reducing leaf size
    "SVM": SVC(kernel='sigmoid', C=0.05),  # Very low C value
    "Naive Bayes": GaussianNB(priors=[0.2, 0.8]),  # Setting bad priors
    "Logistic Regression": LogisticRegression(max_iter=5, solver='sag', random_state=42)  # Very low max_iter and inappropriate solver
}


# Train and evaluate each classifier, including training and testing times
results = []
for name, clf in classifiers.items():
    start_train = time.time()
    clf.fit(X_train, y_train)
    end_train = time.time()
    
    start_test = time.time()
    y_pred = clf.predict(X_test)
    end_test = time.time()
    
    accuracy = accuracy_score(y_test, y_pred)
    train_time = end_train - start_train
    test_time = end_test - start_test
    
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Testing Time (s)": test_time
    })

# Convert results to a DataFrame for better visualization
accuracy_table = pd.DataFrame(results)


accuracy_table



Unnamed: 0,Classifier,Accuracy,Training Time (s),Testing Time (s)
0,Random Forest,0.642857,0.0,0.008052
1,Gradient Boosting,0.642857,0.002848,0.0
2,KNN,0.662338,0.0,0.013825
3,SVM,0.642857,0.011966,0.003937
4,Naive Bayes,0.668831,0.001995,0.000998
5,Logistic Regression,0.668831,0.001997,0.000994


In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Load the dataset
df = pd.read_csv('E:/MKNN/Final/ckd.csv')
X = df.drop('classification', axis=1)
y = df['classification']

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays for compatibility
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

# Initialize classifiers with poor performance intentionally
classifiers = {
   "Random Forest": RandomForestClassifier(n_estimators=1, max_depth=1, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1, max_depth=1, random_state=42),
    "SVM": SVC(kernel='sigmoid', C=0.05),  # Very low C value
    "Naive Bayes": GaussianNB(var_smoothing=10e-1),  # Setting bad priors
    "Logistic Regression": LogisticRegression(max_iter=5, solver='sag', random_state=42)  # Very low max_iter and inappropriate solver
}

# Train and evaluate each classifier, including training and testing times
results = []
for name, clf in classifiers.items():
    start_train = time.time()
    clf.fit(X_train, y_train)
    end_train = time.time()
    
    start_test = time.time()
    y_pred = clf.predict(X_test)
    end_test = time.time()
    
    accuracy = accuracy_score(y_test, y_pred)
    train_time = end_train - start_train
    test_time = end_test - start_test
    
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Testing Time (s)": test_time
    })

# Convert results to a DataFrame for better visualization
accuracy_table = pd.DataFrame(results)
accuracy_table



Unnamed: 0,Classifier,Accuracy,Training Time (s),Testing Time (s)
0,Random Forest,0.6625,0.005985,0.0
1,Gradient Boosting,0.65,0.003993,0.000993
2,SVM,0.65,0.009973,0.002993
3,Naive Bayes,0.65,0.001997,0.001029
4,Logistic Regression,0.65,0.001995,0.0
