In [1]:
# Confirm versions
import platform, sklearn
print("Python:", platform.python_version())
print("Scikit-learn:", sklearn.__version__)


Python: 3.10.14
Scikit-learn: 1.6.1


In [2]:
!pip install scikit-learn-intelex




In [3]:
from sklearnex import patch_sklearn
patch_sklearn()


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [4]:
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
url = "https://archive.ics.uci.edu/static/public/891/data.csv"
df = pd.read_csv(url)
df = df.drop(columns=['ID'])

X = df.drop(columns=['Diabetes_binary'])
y = df['Diabetes_binary']

# Preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ---------- Standard scikit-learn ----------
start = time.time()
clf_sklearn = LogisticRegression(max_iter=1000)
clf_sklearn.fit(X_train, y_train)
end = time.time()

y_pred_sklearn = clf_sklearn.predict(X_test)
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)
time_sklearn = end - start

print("Standard scikit-learn")
print(f"Accuracy: {acc_sklearn:.4f}")
print(f"Training Time: {time_sklearn:.4f} seconds\n")

# ---------- Intel oneAPI Acceleration ----------
!pip install -q scikit-learn-intelex

from sklearnex import patch_sklearn
patch_sklearn()

start = time.time()
clf_intel = LogisticRegression(max_iter=1000)
clf_intel.fit(X_train, y_train)
end = time.time()

y_pred_intel = clf_intel.predict(X_test)
acc_intel = accuracy_score(y_test, y_pred_intel)
time_intel = end - start

print("Intel oneAPI Acceleration")
print(f"Accuracy: {acc_intel:.4f}")
print(f"Training Time: {time_intel:.4f} seconds")


Standard scikit-learn
Accuracy: 0.8659
Training Time: 0.1131 seconds





Intel oneAPI Acceleration
Accuracy: 0.8659
Training Time: 0.1142 seconds


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [5]:
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load and preprocess dataset
df = pd.read_csv("https://archive.ics.uci.edu/static/public/891/data.csv")
df = df.drop(columns=["ID"])
X = df.drop(columns=["Diabetes_binary"])
y = df["Diabetes_binary"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Standard sklearn Random Forest
start = time.time()
clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
clf.fit(X_train, y_train)
end = time.time()

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("scikit-learn Random Forest")
print(f"Accuracy: {acc:.4f}")
print(f"Training time: {end - start:.4f} seconds")




scikit-learn Random Forest
Accuracy: 0.8671
Training time: 4.1445 seconds




In [6]:
from sklearnex import patch_sklearn
patch_sklearn()

# Intel-accelerated Random Forest
start = time.time()
clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
clf.fit(X_train, y_train)
end = time.time()

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("⚡ Intel oneAPI Random Forest")
print(f"Accuracy: {acc:.4f}")
print(f"Training time: {end - start:.4f} seconds")


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


⚡ Intel oneAPI Random Forest
Accuracy: 0.8671
Training time: 3.6874 seconds


