In [1]:
import copy
import torch
import torch.nn as nn
import numpy as np
import os
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.preprocessing import label_binarize
from sklearn import metrics

from flcore.pflniid_utils.data_utils import read_client_data
from utils.custom_loss_class import CPHSLoss

# Toy Example of DataLoader
- https://www.youtube.com/watch?v=3GVUzwXXihs
- https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html#BatchSampler

In [2]:
tp = torch.zeros((10,11))
tp[:,0] = torch.arange(10)
tp

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [7., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [9., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [3]:
dl = DataLoader(
    dataset=tp,
    batch_size=10, 
    drop_last=False) 

it = iter(dl)

In [4]:
it.__next__()

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [7., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [9., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

# Validating read_client_data()

In [5]:
train_data = read_client_data('cphs', 0, is_train=True)


In [6]:
dl1 = DataLoader(
                dataset=train_data,
                batch_sampler=torch.utils.data.BatchSampler(
                    torch.utils.data.SequentialSampler(train_data), 
                    batch_size=1200, 
                    drop_last=False) 
        )
it1 = iter(dl1)

dl2 = DataLoader(
                dataset=train_data,
                batch_size=1200,
                drop_last=False) 
it2 = iter(dl2)


In [7]:
s1 = it1.__next__()
print(s1[0].size())
print(s1[0][:, :4])

torch.Size([1200, 64])
tensor([[  0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000],
        ...,
        [128.8972,  71.8650,  34.2512,  10.2227],
        [135.5390,  80.6596,  33.9306,  10.2820],
        [135.5390,  80.6596,  33.9306,  10.2820]])


In [8]:
s2 = it2.__next__()
print(s2[0].size())
print(s2[0][:4, :4])

torch.Size([1200, 64])
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [16.4556, 18.0038,  2.8854,  5.8259]])


# Deconstructed load_train_data() From Clientbase.py

In [9]:
#self.model = copy.deepcopy(args.model)
algorithm = 'FedAvg'
dataset = 'cphs'
device = 'cpu'
ID = 0  # integer
#self.save_folder_name = args.save_folder_name

#self.num_classes = args.num_classes
#train_samples = train_samples
#test_samples = test_samples
batch_size = 1200
learning_rate = 0.005
local_epochs = 1

# My additional parameters
pca_channels = 64
lambdas = [0, 1e-3, 1e-4]
lambdaF = lambdas[0]
lambdaD = lambdas[1]
lambdaE = lambdas[2]
current_update = 0
local_round = 0
last_global_round = 0
local_round_threshold = 50
update_ix=[0,  1200,  2402,  3604,  4806,  6008,  7210,  8412,  9614, 10816, 12018, 13220, 14422, 15624, 16826, 18028, 19230, 20432, 20769]

In [10]:
local_round += 1
if local_round%local_round_threshold==0:
    current_update += 1

if batch_size == None:
    batch_size = batch_size
train_data = read_client_data(dataset, ID, is_train=True)


In [11]:
loader = DataLoader(
        dataset=train_data,
        batch_sampler=torch.utils.data.BatchSampler(
            torch.utils.data.SequentialSampler(dataset), 
            batch_size=batch_size, 
            drop_last=False) 
)
sit1 = iter(loader)
s1 = sit1.__next__()
print(s1[0].size())
print(s1[0][:, :10])

torch.Size([4, 64])
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [16.4556, 18.0038,  2.8854,  5.8259,  8.1008,  2.2209,  9.7316, 19.7165,
         11.9520,  0.5433]])


In [18]:
# I think it is doing Sequential Sampler by default, if shuffle is False (which is true by default)

loader2 = DataLoader(
        dataset=train_data,
        batch_size=batch_size, 
        drop_last=False) 
sit2 = iter(loader2)
s2 = sit2.__next__()
print(s2[0].size())
print(s2[0][:4, :10])

torch.Size([1200, 64])
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [16.4556, 18.0038,  2.8854,  5.8259,  8.1008,  2.2209,  9.7316, 19.7165,
         11.9520,  0.5433]])


In [19]:
print(s2[0][:10, :5])

tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [16.4556, 18.0038,  2.8854,  5.8259,  8.1008],
        [16.4556, 18.0038,  2.8854,  5.8259,  8.1008],
        [16.4556, 18.0038,  2.8854,  5.8259,  8.1008],
        [40.5837, 41.7818, 13.8757, 12.2603, 15.2425],
        [58.4589, 56.5549, 24.6047, 24.2789, 20.7511],
        [58.4589, 56.5549, 24.6047, 24.2789, 20.7511],
        [45.4502, 43.0116, 24.5893, 24.8776, 16.5744]])


In [20]:
print(f"s2 type: {type(s2)}")
print(f"s2 len: {len(s2)}")
print(f"s2[0] (training data) size: {s2[0].size()}")
print(f"s2[1] (training data) size: {s2[1].size()}")

s2 type: <class 'list'>
s2 len: 2
s2[0] (training data) size: torch.Size([1200, 64])
s2[1] (training data) size: torch.Size([1200, 2])


In [21]:
s2[0]

tensor([[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        ...,
        [128.8972,  71.8650,  34.2512,  ...,   8.3115,  10.0432,   9.2224],
        [135.5390,  80.6596,  33.9306,  ...,  10.5396,  11.5099,   9.2469],
        [135.5390,  80.6596,  33.9306,  ...,  10.5396,  11.5099,   9.2469]])

## Integrating With CPHS Processing Code

In [None]:
s_temp = s2[0][0:update_ix[1]]

In [25]:
normalize_EMG = False
PCA_comps = 64

# First, normalize the entire s matrix
if normalize_EMG:
    s_normed = s_temp/torch.max(s_temp)
else:
    s_normed = s_temp
# Now do PCA unless it is set to 64 (AKA the default num channels i.e. no reduction)
# Also probably ought to find a global transform if possible so I don't recompute it every time...
if PCA_comps!=64:  
    pca = PCA(n_components=PCA_comps)
    s_normed = pca.fit_transform(s_normed)
#s = np.transpose(s_normed)

In [27]:
s_normed.size()

torch.Size([1200, 64])

In [32]:
s = torch.transpose(s_normed, 0, 1)
s.size()

torch.Size([64, 1200])

In [None]:
# Can't run this since I don't have the model weights

#F = s[:,:-1] # note: truncate F for estimate_decoder
#v_actual = self.w@s
#p_actual = np.cumsum(v_actual, axis=1)*self.dt  # Numerical integration of v_actual to get p_actual
#p_reference = np.transpose(self.labels[lower_bound:upper_bound,:])
#self.V = (p_reference - p_actual)*self.dt
#
#self.loss = CPHSLoss(self.F, self.model.weight, self.V, torch.view(self.F)[0], lambdaF=self.lambdaF, lambdaD=self.lambdaD, lambdaE=self.lambdaE, Nd=2, Ne=self.pca_channels, return_cost_func_comps=False)
#
#self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
#self.learning_rate_scheduler = torch.optim.lr_scheduler.ExponentialLR(
#    optimizer=self.optimizer, 
#    gamma=args.learning_rate_decay_gamma
#)
#self.learning_rate_decay = args.learning_rate_decay

## Checking How Many Iterations The Iter Object Has

In [None]:
try:
    s2 = sit2.__next__()
    print("We can run multiple times!")
except StopIteration:
    print("StopIteration Error: Can only call next once!")

Why can we run multiple times... can we run 18 or 19 total times (once for each update?)?

In [None]:
for i in range(19):
    print(i+2)  # +2 since we have already called __next__() twice in the code above
    
    try:
        s2 = sit2.__next__()
    except StopIteration:
        print("StopIteration Error: Can only call next once!")