In [1]:
import pandas as pd

# Agar file notebook ke folder me hai to sirf naam likho
df = pd.read_csv('diabetes.csv')

# Data dekhne ke liye
print(df.head())


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [3]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)

# Get probabilities
nb_probs = nb.predict_proba(X_test)

# Threshold for ambiguity
threshold = 0.4
ambiguous_indices = (nb_probs[:, 1] > threshold) & (nb_probs[:, 1] < 1 - threshold)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Default predictions
final_predictions = nb.predict(X_test)

# Replace ambiguous with KNN predictions
if ambiguous_indices.any():
    knn_preds = knn.predict(X_test[ambiguous_indices])
    final_predictions[ambiguous_indices] = knn_preds

# Evaluate
accuracy = accuracy_score(y_test, final_predictions)
print(f"Hybrid NaiveBayes-KNN Accuracy: {accuracy:.2f}")


Hybrid NaiveBayes-KNN Accuracy: 0.82


In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Decision Tree
tree = DecisionTreeClassifier(max_depth=5, random_state=42)
tree.fit(X_train, y_train)

# Get probabilities
tree_probs = tree.predict_proba(X_test)

# Threshold for ambiguity
threshold = 0.4
ambiguous_indices = (tree_probs[:, 1] > threshold) & (tree_probs[:, 1] < 1 - threshold)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Default predictions
final_predictions = tree.predict(X_test)

# Replace ambiguous
if ambiguous_indices.any():
    knn_preds = knn.predict(X_test[ambiguous_indices])
    final_predictions[ambiguous_indices] = knn_preds

# Evaluate
accuracy = accuracy_score(y_test, final_predictions)
print(f"Hybrid Tree-KNN Accuracy: {accuracy:.2f}")


Hybrid Tree-KNN Accuracy: 0.87


In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Probabilities
rf_probs = rf.predict_proba(X_test)

# Ambiguity threshold
threshold = 0.45
ambiguous_indices = (rf_probs[:, 1] > threshold) & (rf_probs[:, 1] < 1 - threshold)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict normally
final_predictions = rf.predict(X_test)

# Replace ambiguous
if ambiguous_indices.any():
    knn_preds = knn.predict(X_test[ambiguous_indices])
    final_predictions[ambiguous_indices] = knn_preds

# Evaluate
accuracy = accuracy_score(y_test, final_predictions)
print(f"Hybrid RandomForest-KNN Accuracy: {accuracy:.2f}")


Hybrid RandomForest-KNN Accuracy: 0.86


In [9]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 1: Train a Decision Tree
tree = DecisionTreeClassifier(max_depth=3, random_state=42)
tree.fit(X_train, y_train)

# Step 2: Generate new features using Decision Tree probabilities
X_train_tree = tree.predict_proba(X_train)
X_test_tree = tree.predict_proba(X_test)

# Step 3: Train Logistic Regression on Decision Tree outputs
logreg = LogisticRegression(random_state=42)
logreg.fit(X_train_tree, y_train)

# Step 4: Evaluate the hybrid model
y_pred = logreg.predict(X_test_tree)
accuracy = accuracy_score(y_test, y_pred)

print(f"Hybrid Model Accuracy: {accuracy:.2f}")

Hybrid Model Accuracy: 0.86
