In [18]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from joblib import Parallel, delayed

In [19]:

# Load dataset (Iris dataset for example)
import time


iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the models to train
def train_svm(X_train, y_train):
    start_time = time.time()

    svm_model = SVC(kernel='linear')
    svm_model.fit(X_train, y_train)
    end_time = time.time()

    return svm_model, end_time - start_time

def train_knn(X_train, y_train):
    start_time = time.time()

    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train, y_train)
    end_time = time.time()

    return knn_model, end_time - start_time

def train_decision_tree(X_train, y_train):
    start_time = time.time()

    tree_model = DecisionTreeClassifier()
    tree_model.fit(X_train, y_train)
    end_time = time.time()

    return tree_model, end_time - start_time

In [36]:


n_jobs_proc=-1

# Use joblib to run all models in parallel by passing a list of delayed tasks
results = Parallel(n_jobs=n_jobs_proc)(
    [delayed(train_svm)(X_train, y_train),   # Train SVM
     delayed(train_knn)(X_train, y_train),   # Train KNN
     delayed(train_decision_tree)(X_train, y_train)]  # Train Decision Tree
)

# Unpack the trained models
(svm_model, svm_time1), (knn_model, knn_time1), (tree_model, tree_time1) = results



# Print accuracy and training time for each model
print(f" Training Time: {svm_time1:.4f} seconds")
print(f" Training Time: {knn_time1:.4f} seconds")
print(f" Training Time: {tree_time1:.4f} seconds")


 Training Time: 0.0050 seconds
 Training Time: 0.0010 seconds
 Training Time: 0.0030 seconds


## using DASK

In [21]:
import dask
from dask.distributed import Client ,LocalCluster
# Start Dask client with 4 workers (4 CPUs)
import dask.delayed



# Split dataset into training and test sets
features_train, features_test, labels_train, labels_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [32]:
cluster = LocalCluster(dashboard_address=':8888' , n_workers=-1 , threads_per_worker = -1)
cluster

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35986 instead


0,1
Dashboard: http://127.0.0.1:35986/status,Workers: 0
Total threads: 0,Total memory: 0 B
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35989,Workers: 0
Dashboard: http://127.0.0.1:35986/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [33]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:35986/status,

0,1
Dashboard: http://127.0.0.1:35986/status,Workers: 0
Total threads: 0,Total memory: 0 B
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35989,Workers: 0
Dashboard: http://127.0.0.1:35986/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [34]:
# Define the models to train
def train_svm_model(features_train, labels_train):
    start = time.time()
    svm_clf = SVC(kernel='linear')
    svm_clf.fit(features_train, labels_train)
    end = time.time()
    return svm_clf, end - start

def train_knn_model(features_train, labels_train):
    start = time.time()
    knn_clf = KNeighborsClassifier(n_neighbors=5)
    knn_clf.fit(features_train, labels_train)
    end = time.time()
    return knn_clf, end - start

def train_tree_model(features_train, labels_train):
    start = time.time()
    tree_clf = DecisionTreeClassifier()
    tree_clf.fit(features_train, labels_train)
    end = time.time()
    return tree_clf, end - start

In [35]:

# Use Dask delayed to parallelize the training of models
svm_task_delayed = dask.delayed(train_svm_model)(X_train, y_train)
knn_task_delayed = dask.delayed(train_knn_model)(X_train, y_train)
tree_task_delayed = dask.delayed(train_tree_model)(X_train, y_train)

# Compute the tasks in parallel
trained_models = dask.compute(svm_task_delayed, knn_task_delayed, tree_task_delayed, scheduler='threads')

# Unpack the trained models and their training times
(svm_clf, svm_training_time), (knn_clf, knn_training_time), (tree_clf, tree_training_time) = trained_models

# Print training times for each model
print(f"SVM Training Time: {svm_training_time:.4f} seconds")
print(f"KNN Training Time: {knn_training_time:.4f} seconds")
print(f"Decision Tree Training Time: {tree_training_time:.4f} seconds")




SVM Training Time: 0.0935 seconds
KNN Training Time: 0.0160 seconds
Decision Tree Training Time: 0.4969 seconds


In [37]:

# Shutdown Dask client
client.shutdown()

## instead of using CPU we try using GPU
### dask - cudaf

In [None]:
import cupy as cp
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import time

# Load dataset (Iris dataset for example)
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Move data to GPU
X_gpu = cp.array(X)
y_gpu = cp.array(y)

# Split dataset into training and test sets (still using CPU split here)
X_train_gpu, X_test_gpu, y_train_gpu, y_test_gpu = train_test_split(
    X_gpu, y_gpu, test_size=0.3, random_state=42
)


In [4]:

# Train SVM on GPU using CuPy arrays
def train_svm_gpu(X_train_gpu, y_train_gpu):
    start_time = cp.cuda.Event()
    end_time = cp.cuda.Event()

    start_time.record()

    # Fit using CPU model but data on GPU
    svm_model_gpu = SVC(kernel='linear')
    svm_model_gpu.fit(cp.asnumpy(X_train_gpu), cp.asnumpy(y_train_gpu))

    end_time.record()
    end_time.synchronize()

    return svm_model_gpu, cp.cuda.get_elapsed_time(start_time, end_time)

# Train KNN on GPU using CuPy arrays
def train_knn_gpu(X_train_gpu, y_train_gpu):
    start_time = cp.cuda.Event()
    end_time = cp.cuda.Event()

    start_time.record()

    # Fit using CPU model but data on GPU
    knn_model_gpu = KNeighborsClassifier(n_neighbors=5)
    knn_model_gpu.fit(cp.asnumpy(X_train_gpu), cp.asnumpy(y_train_gpu))

    end_time.record()
    end_time.synchronize()

    return knn_model_gpu, cp.cuda.get_elapsed_time(start_time, end_time)

# Train Decision Tree on GPU using CuPy arrays
def train_decision_tree_gpu(X_train_gpu, y_train_gpu):
    start_time = cp.cuda.Event()
    end_time = cp.cuda.Event()

    start_time.record()

    # Fit using CPU model but data on GPU
    tree_model_gpu = DecisionTreeClassifier()
    tree_model_gpu.fit(cp.asnumpy(X_train_gpu), cp.asnumpy(y_train_gpu))

    end_time.record()
    end_time.synchronize()

    return tree_model_gpu, cp.cuda.get_elapsed_time(start_time, end_time)


# Parallel execution of the models (although we are now using the GPU)
svm_model, svm_time_gpu = train_svm_gpu(X_train_gpu, y_train_gpu)
knn_model, knn_time_gpu = train_knn_gpu(X_train_gpu, y_train_gpu)
tree_model, tree_time_gpu = train_decision_tree_gpu(X_train_gpu, y_train_gpu)

# Print training time for each model
print(f"SVM Training Time on GPU: {svm_time_gpu:.4f} seconds")
print(f"KNN Training Time on GPU: {knn_time_gpu:.4f} seconds")
print(f"Decision Tree Training Time on GPU: {tree_time_gpu:.4f} seconds")
