Machine Learning with Dask

In [None]:
# !pip install scikeras>=0.1.8
# !pip install tensorflow>=2.3.0
# !pip install -U skorch
# !pip install torch
# !pip install torchvision
# !pip install pytorch-cpu #not sure if i need to fix this
!pip install s3fs

In [None]:
!pip install cloudpickle==2.1.0
!pip install dask==2022.05.0
!pip install distributed==2022.5.0
!pip install lz4==4.0.0
!pip install msgpack==1.0.3
!pip install toolz==0.11.2


setup cluster

In [None]:
import dask
# Dask multithreading is only suited for mostly non-Python code (like pandas, numpy, etc.)
#tag::threads[]
dask.config.set(scheduler='threads')
#end::threads[]
#tag::process[]
dask.config.set(scheduler='processes')
#end::process[]
#tag::dask_use_forkserver[]
dask.config.set({"multiprocessing.context": "forkserver", "scheduler": "processes"})
#end::dask_use_forkserver[]

In [None]:
!export

In [None]:
#tag::make_dask_k8s_client[]
import dask
from dask.distributed import Client
from dask_kubernetes import KubeCluster, make_pod_spec
# Use load balancer to make it externally available, for purely internal
# the default of "ClusterIP" is better.
dask.config.set({"kubernetes.scheduler-service-type": "LoadBalancer"})
worker_template = make_pod_spec(image='holdenk/dask:latest',
                         memory_limit='8G', memory_request='8G',
                         cpu_limit=1, cpu_request=1)
scheduler_template = make_pod_spec(image='holdenk/dask:latest',
                         memory_limit='4G', memory_request='4G',
                         cpu_limit=1, cpu_request=1)
cluster = KubeCluster(pod_template = worker_template, scheduler_pod_template = scheduler_template)
cluster.adapt()    # or create and destroy workers dynamically based on workload
from dask.distributed import Client
client = Client(cluster)
#end::make_dask_k8s_client[]

In [None]:
client.close()

In [2]:
from dask.distributed import Client
# when working with clusters, specify cluster config, n_workers and worker_size
client = Client()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 51567 instead


In [3]:
import pandas as pd
import glob
import toolz
import dask
import dask.array as da
import torch
from torchvision import transforms
from PIL import Image
import numpy as np
import torch.nn as nn
import torch.optim as optim # optimization algo (eg SGD, Adam)
import torch.nn.functional as F # non-linear activation fn (e.g. relu, softmin, softamx, logsigmoid)
from torchvision import datasets, transforms # convenience wrapper for datasets and model architectures, common image transformations
from torch.utils.data.sampler import SubsetRandomSampler #validation test split
import urllib.request
import zipfile

1. Extract: get fashion-mnist

In [133]:
# we use dask.delayed so that load and transform is lazily done in cluster
@dask.delayed
def transform(img):
    trn = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,),)
    ]) #normalize mean / std. given as tuples
    #we convert PIL image or numpy.ndarray [0,255] to torch.FloatTensor, (C,H,W) [0.0,1.0]
    return trn(img)

def transform_nonlazy(img):
    trn = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,),)
    ]) #normalize mean / std. given as tuples
    #we convert PIL image or numpy.ndarray [0,255] to torch.FloatTensor, (C,H,W) [0.0,1.0]
    return trn(img)

# @dask.delayed
# def transform_pd(img):
# #     ndarr = torch.from_numpy(img).long()
#     trn = transforms.Compose([
# #             transforms.to_numpy(),
#             transforms.ToTensor(),
#             transforms.Normalize((0.5,), (0.5,),)
#     ]) #normalize mean / std. given as tuples
#     #we convert PIL image or numpy.ndarray [0,255] to torch.FloatTensor, (C,H,W) [0.0,1.0]
#     return trn(ndarr)
@dask.delayed
def transform_pd(df):
    numpy_arr = df.to_numpy()
#     tensor = torch.from_numpy(img).long()
    trn = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,),)
    ]) #normalize mean / std. given as tuples
    #we convert PIL image or numpy.ndarray [0,255] to torch.FloatTensor, (C,H,W) [0.0,1.0]
    return trn(numpy_arr)

In [6]:
def load(path, fs=__builtins__):
    with fs.open(path, 'rb') as f:
        img = Image.open(f).convert("RGB")
        return img
    
def load_csv(filename):
    return pd.read_csv(filename)

In [134]:
tensors = []
for f in ["fashion_mnist/dataset/fashion-mnist_train.csv"]:
    img_pd = client.submit(load_csv, f)
#     tensors.append(img_pd)
    tensor = client.submit(transform_pd, img_pd)
    tensors.append(tensor)
# tensors[:5]

In [135]:
tensors

[<Future: finished, type: dask.delayed.Delayed, key: transform_pd-722e999486202d9e5681c1d72a576509>]

In [109]:
batches = []
for b in toolz.partition_all(10, tensors):
    batch = client.submit(torch.stack, b)
    batches.append(batch)

Key:       stack-7c03d3becc2397790c37652b0642043f
Function:  stack
args:      ((Delayed('transform_pd-51403590-793c-4507-9b23-e01591c1ca39'),))
kwargs:    {}
Exception: "TypeError('expected Tensor as element 0 in argument 0, but got Delayed')"



In [136]:
tensors[0].status

'finished'

In [137]:
x = await tensors[0]

In [138]:
x

Delayed('transform_pd-1431b49b-53e6-4e30-93c5-8f7087cfe06e')

In [139]:
dask.compute(x)

Key:       transform_pd-1431b49b-53e6-4e30-93c5-8f7087cfe06e
Function:  transform_pd
args:      (       label  pixel1  pixel2  pixel3  ...  pixel781  pixel782  pixel783  pixel784
0          2       0       0       0  ...         0         0         0         0
1          9       0       0       0  ...         0         0         0         0
2          6       0       0       0  ...         0         0         0         0
3          0       0       0       0  ...         0         0         0         0
4          3       0       0       0  ...         0         0         0         0
...      ...     ...     ...     ...  ...       ...       ...       ...       ...
59995      9       0       0       0  ...         0         0         0         0
59996      1       0       0       0  ...         0         0         0         0
59997      8       0       0       0  ...         0         0         0         0
59998      8       0       0       0  ...         0         0         0         0
5

TypeError: Input tensor should be a float tensor. Got torch.int64.

In [100]:
img = load_csv("fashion_mnist/dataset/fashion-mnist_train.csv")

In [61]:
img = pd.read_csv("fashion_mnist/dataset/fashion-mnist_train.csv")

In [169]:
type(img)
img_df = img

In [170]:
img_df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [141]:
numpy_arr = img_df.to_numpy()

In [175]:
numpy_arr[0]

array([  2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   4,   0,
         0,   0,   0,   0,  62,  61,  21,  29,  23,  51, 136,  61,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,  88, 201, 228, 225, 255, 115,  62, 137, 255, 235,
       222, 255, 135,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,  47, 252, 234, 238, 224, 215, 215, 229, 108, 180,
       207, 214, 224, 231, 249, 254,  45,   0,   0,   0,   0,   0,   0,
         0,   0,   1,   0,   0, 214, 222, 210, 213, 224, 225, 21

In [176]:
trn = transforms.Compose([
        transforms.ToTensor()
#         transforms.Normalize(mean=0.5, std=0.5)
]) #normalize mean / std. given as tuples
#we convert PIL image or numpy.ndarray [0,255] to torch.FloatTensor, (C,H,W) [0.0,1.0]


In [182]:
xxx = trn(numpy_arr)

In [180]:
type(xxx)

torch.Tensor

In [178]:
xxx[0][0]

tensor([  2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   4,   0,   0,   0,   0,   0,  62,  61,  21,
         29,  23,  51, 136,  61,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,  88, 201, 228, 225, 255, 115,
         62, 137, 255, 235, 222, 255, 135,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,  47, 252, 234, 238, 224, 215, 215, 229,
        108, 180, 207, 214, 224, 231, 249, 254,  45,   0,   0,   0,   0,   0,
          0,   0,   0,   1,   0,   0, 214, 222, 210, 213, 224, 2

In [10]:
objs = datasets.FashionMNIST('~/.pytorch/F_MNIST_data', download=True, train=True, transform = transform)

In [11]:
objs

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: /Users/mk/.pytorch/F_MNIST_data
    Split: Train
    StandardTransform
Transform: Delayed('transform-8f6eab51-32e0-4f8a-81a9-b0f0ea6a4cef')

In [24]:
objs_nonlazy = datasets.FashionMNIST('~/.pytorch/F_MNIST_data', download=True, train=True, transform = transform_nonlazy)

In [26]:
type(objs_nonlazy)

torchvision.datasets.mnist.FashionMNIST

In [21]:
objs_nonlazy

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: /Users/mk/.pytorch/F_MNIST_data
    Split: Train
    StandardTransform
Transform: <function transform_nonlazy at 0x7fc59ae28700>

In [16]:
trainset_len = 6000 #cheating a bit - we already know length
indices = list(range(trainset_len))
split = int(np.floor(0.2 * trainset_len))
batch_size = 64
num_workers = 4 #would get it from distributed client
train_sampler = SubsetRandomSampler(indices[:split])



In [19]:
train_loader = torch.utils.data.DataLoader(
    objs, sampler=train_sampler, batch_size=batch_size, num_workers=num_workers
)
train_loader_nonlazy = torch.utils.data.DataLoader(
    objs_nonlazy, sampler=train_sampler, batch_size=batch_size, num_workers=num_workers
)


In [30]:
tensors_nonlazy = objs_nonlazy.train_data()

TypeError: 'Tensor' object is not callable

In [20]:
for image, labels in train_loader_nonlazy:
    pass

Traceback (most recent call last):
  File "<string>", line 1, in <module>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/mk/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
At

RuntimeError: DataLoader worker (pid(s) 40149, 40151, 40152) exited unexpectedly