In [1]:
import numpy as np
import os
import glob
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
import collections
from sklearn.model_selection import train_test_split
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow_federated as tff
import random

In [2]:
data_dir_parent = "C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset"
image_names = [[] for _ in range(11)]
labels = [[] for _ in range(11)]
for client_num in range(6):
    print(client_num)
    data_dir = data_dir_parent+"/"+str(client_num)
    for i in range(11):
        data_dir2 = data_dir+"/"+str(i)+"/*.jpg"
        if len(glob.glob(data_dir2))>0:
            temp_names=glob.glob(data_dir2)
            image_names[i].extend(temp_names)
            labels[i].extend(np.ones(len(temp_names),dtype=int)*i)

0
1
2
3
4
5


In [3]:
def roundup(nom, denom):
    return nom//denom + (nom%denom > 0)

In [21]:
def preprocess(dataset):

    def batch_format_fn(element):
        return collections.OrderedDict(
                x=element['image'],
                y=tf.reshape(element['label'], [-1, 1]))

    return dataset.shuffle(10000, seed=42).batch(128).map(batch_format_fn)

In [12]:
spacecount = 240
train_image_names = []
labels_train = []
test_image_names = []
labels_test = []
for i in range(11):
    train, test = train_test_split(image_names[i], test_size=0.2, random_state=42)
    test_image_names.extend(test)
    labels_test.extend(np.ones(len(test),dtype=int)*i)
    if len(train) < spacecount:
        new_num = roundup(spacecount, len(train))
        temp_names = train*new_num
        random.shuffle(temp_names)
        train_image_names.extend(temp_names[:spacecount])
        labels_train.extend(np.ones(spacecount,dtype=int)*i)
    else:
        temp_names = train
        random.shuffle(temp_names)
        train_image_names.extend(temp_names[:spacecount])
        labels_train.extend(np.ones(spacecount,dtype=int)*i)

In [91]:
size = (32,32)
dirichlet_parameter = 0.2
num_clients = 6
NUM_CLASSES = 11
train_labels = np.array(labels_train)
test_labels = np.array(labels_test)
TRAIN_EXAMPLES = spacecount*NUM_CLASSES
TRAIN_EXAMPLES_PER_LABEL = spacecount
train_images = train_image_names.copy()

tf.random.set_seed(42)
np.random.seed(42)

train_clients = collections.OrderedDict()
test_clients = collections.OrderedDict()
train_multinomial_vals = []
# Each client has a multinomial distribution over classes drawn from a
# Dirichlet.
for i in range(num_clients):
    proportion = np.random.dirichlet(dirichlet_parameter *
                                     np.ones(NUM_CLASSES,))
    train_multinomial_vals.append(proportion)

train_multinomial_vals = np.array(train_multinomial_vals)

train_example_indices = []
for k in range(NUM_CLASSES):
    train_label_k = np.where(train_labels == k)[0]
    np.random.shuffle(train_label_k)
    train_example_indices.append(train_label_k)

train_example_indices = np.array(train_example_indices)

train_client_samples = [[] for _ in range(num_clients)]
train_count = np.zeros(NUM_CLASSES).astype(int)

train_examples_per_client = int(TRAIN_EXAMPLES / num_clients)
for k in range(num_clients):

    for i in range(train_examples_per_client):
      sampled_label = np.argwhere(
          np.random.multinomial(1, train_multinomial_vals[k, :]) == 1)[0][0]
      train_client_samples[k].append(
          train_example_indices[sampled_label, train_count[sampled_label]])
      train_count[sampled_label] += 1
      if train_count[sampled_label] == TRAIN_EXAMPLES_PER_LABEL:
        train_multinomial_vals[:, sampled_label] = 0
        train_multinomial_vals = (
            train_multinomial_vals /
            train_multinomial_vals.sum(axis=1)[:, None])

for i in range(num_clients):
    client_name = i
    #x_train = train_images[np.array(train_client_samples[i])]
    x_train = [train_images[x] for x in np.array(train_client_samples[i])]
    #x_train = [tf.keras.preprocessing.image.img_to_array(PIL.Image.open(train_images[x]).resize(size))/255 for x in np.array(train_client_samples[i])]
    y_train = train_labels[np.array(
        train_client_samples[i])].astype('int64').squeeze()
    train_data = collections.OrderedDict(
        (('image', x_train), ('label', y_train)))
    train_clients[client_name] = train_data
    
x_test = [tf.keras.preprocessing.image.img_to_array(PIL.Image.open(test_image_names[x]).resize(size))/255
           for x in range(len(labels_test))]
test_data = collections.OrderedDict((('image', x_test), ('label', test_labels)))
test_clients[0] = test_data

train_dataset = tff.simulation.FromTensorSlicesClientData(train_clients)
test_dataset = tff.simulation.FromTensorSlicesClientData(test_clients)



In [92]:
train_dataset.client_ids

[0, 1, 2, 3, 4, 5]

In [93]:
ts = [preprocess(train_dataset.create_tf_dataset_for_client(x)) for x in train_dataset.client_ids]

In [94]:
class_count = np.zeros((num_clients,11))
for client_num in range(num_clients):
    print(client_num)
    ds = ts[client_num]
    for batch in ds:
        for y in batch['y']:
            class_count[client_num,y] += 1
class_count = class_count.astype(int)

0
1
2
3
4
5


In [95]:
class_count

array([[ 30,   0,   0,   2, 227,   3,   0, 157,  11,   8,   2],
       [  0, 100,  15,   0,   0,  56,  47,   4, 109,  74,  35],
       [140,   4,   0, 173,   0,   0,   1,   0,   0, 122,   0],
       [ 44, 126, 225,  21,   0,   0,   0,  19,   1,   4,   0],
       [ 26,   0,   0,   0,  13,  52,  51,   0, 119,   0, 179],
       [  0,  10,   0,  44,   0, 129, 141,  60,   0,  32,  24]])

In [90]:
class_count

array([[  6, 112,   0,   0,  41,   0, 217,   0,   0,   7,  57],
       [  2,  11,   1,  83,   0, 240,   2, 100,   0,   0,   1],
       [  1,   6, 239, 135,  15,   0,   0,   2,  42,   0,   0],
       [  0,   0,   0,   3, 160,   0,   0,  51, 166,   0,  60],
       [131,  27,   0,   0,   5,   0,   3,   0,   1, 233,  40],
       [100,  84,   0,  19,  19,   0,  18,  87,  31,   0,  82]])

In [192]:
class_count

array([[  53, 1409,    1,    0,  532,    0, 2794,    1,   18,  101,  591],
       [  14,  173,    3,  827,    0, 3000,   27, 1443,    0,    0,   13],
       [   2,   77, 2996, 1755,  105,    0,    0,   14,  549,    2,    0],
       [   0,    0,    0,   19, 1762,    0,    8,  663, 2195,    0,  853],
       [1643,  252,    0,    0,   24,    0,   66,    1,   15, 2897,  602],
       [1288, 1089,    0,  399,  577,    0,  105,  878,  223,    0,  941]])

In [195]:
class_count2 = np.zeros((1,11))
for client_num in range(1):
    print(client_num)
    ds = tes[client_num]
    for batch in ds:
        for y in batch['y']:
            class_count2[client_num,y] += 1
class_count2 = class_count2.astype(int)

0


In [196]:
class_count2

array([[5830,  770, 1275,  470,  185, 4476,   48,   84,  175,  356,  433]])

In [209]:
for i in range(11):
    print(len(nplabels[i]))

29150
3850
6371
2349
924
22378
240
418
874
1777
2165


In [190]:
tes = [preprocess(test_dataset.create_tf_dataset_for_client(x)) for x in test_dataset.client_ids]

In [26]:
a = (112,112)
if a == (112,112):
    print("hi")

hi


In [27]:
len(labels[0])

29150

In [28]:
for i in range(len(labels)):
    print(len(labels[i]))

29150
3850
6371
2349
924
22378
240
418
874
1777
2165


In [39]:
240*0.8

192.0

In [41]:
image_names[0]

['C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\0.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\10.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\100.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1000.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1001.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1002.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1003.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1004.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1005.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1006.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1007.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1008.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/0\\1009.jpg',
 

In [42]:
train, test = train_test_split(image_names[i][:240], test_size=0.2, random_state=42)

In [43]:
train

['C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1122.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1051.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1181.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1165.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1184.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1171.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\12.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1186.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1100.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1195.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1063.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\112.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1083.jpg',
 'C:/Users/temmuz/Desktop/2022-2/proje/stanford/dataset/0/5\\1110.j

In [45]:
a = PIL.Image.open(train[0])

In [47]:
tf.keras.preprocessing.image.img_to_array(a)

array([[[162., 161., 143.],
        [165., 164., 146.],
        [171., 170., 150.],
        ...,
        [173., 170., 161.],
        [175., 173., 161.],
        [179., 177., 165.]],

       [[147., 146., 128.],
        [147., 146., 128.],
        [147., 146., 126.],
        ...,
        [177., 174., 165.],
        [179., 177., 165.],
        [183., 181., 169.]],

       [[176., 175., 157.],
        [171., 170., 152.],
        [162., 161., 141.],
        ...,
        [181., 178., 169.],
        [184., 182., 170.],
        [187., 185., 173.]],

       ...,

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 180., 152.],
        ...,
        [ 65.,  65.,  65.],
        [ 64.,  64.,  64.],
        [ 64.,  64.,  64.]],

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 180., 152.],
        ...,
        [ 65.,  65.,  65.],
        [ 65.,  65.,  65.],
        [ 65.,  65.,  65.]],

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 1

In [51]:
tf.keras.preprocessing.image.img_to_array(a.resize((112,112)))

array([[[162., 161., 143.],
        [165., 164., 146.],
        [171., 170., 150.],
        ...,
        [173., 170., 161.],
        [175., 173., 161.],
        [179., 177., 165.]],

       [[147., 146., 128.],
        [147., 146., 128.],
        [147., 146., 126.],
        ...,
        [177., 174., 165.],
        [179., 177., 165.],
        [183., 181., 169.]],

       [[176., 175., 157.],
        [171., 170., 152.],
        [162., 161., 141.],
        ...,
        [181., 178., 169.],
        [184., 182., 170.],
        [187., 185., 173.]],

       ...,

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 180., 152.],
        ...,
        [ 65.,  65.,  65.],
        [ 64.,  64.,  64.],
        [ 64.,  64.,  64.]],

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 180., 152.],
        ...,
        [ 65.,  65.,  65.],
        [ 65.,  65.,  65.],
        [ 65.,  65.,  65.]],

       [[174., 180., 152.],
        [174., 180., 152.],
        [174., 1

In [77]:
a = (10   ,11)


In [78]:
a[0]

10

In [79]:
b = (a[0],a[1],3)

In [80]:
b

(10, 11, 3)