In [295]:
from __future__ import absolute_import, division, print_function

import collections
import numpy as np
from six.moves import range
import tensorflow as tf
import tensorflow_federated as tff
import tempfile
import os
import six

import json
import pandas as pd
import _pickle as pickle
from tensorflow_federated.python.simulation import file_per_user_client_data
import functools
from random import shuffle
from sklearn.model_selection import train_test_split

tf.enable_resource_variables()

### Data preprocessing

In [21]:
##1. read raw data
bg1_path = 'data/20396154_entries.json'
bg2_path = 'data/99296581_entries.json'
df_bg1 = pd.read_json(bg1_path)
df_bg2 = pd.read_json(bg2_path)

In [115]:
##2. first step cleaning, remove unrelated information
def prefed_process(df_bg, client_id, window):
    df_bg = df_bg[['date', 'sgv']]    
    ret = df_bg.set_index('date').sort_index()
    in_start = 0
    fed = []
    for i in range(len(df_bg)):
        in_end = in_start + window
        if in_end < len(df_bg):
            fed.append((client_id, df_bg['sgv'][in_start:in_end].values.tolist()))
        in_start += 1 
    
    return fed

In [131]:
fed1 = prefed_process(df_bg1,'20396154',18)
fed2 = prefed_process(df_bg2,'99296581',18)

In [290]:
fed = fed1+fed2
shuffle(fed)

In [293]:
len(fed)

87847

In [185]:
# Create an instance of the client
def _create_example(features):
  """Convert a tuple of features to a tf.Example."""
  output_features = collections.OrderedDict()
  for i, feature in enumerate(features):
    if isinstance(feature, int):
      output_features[str(i)] = tf.train.Feature(
          int64_list=tf.train.Int64List(value=[feature]))
    elif isinstance(feature, float):
      output_features[str(i)] = tf.train.Feature(
          float_list=tf.train.FloatList(value=[feature]))
    elif isinstance(feature, list):
      output_features[str(i)] = tf.train.Feature(
          float_list=tf.train.FloatList(value=feature))
    else:
      # This is hit if the unittest is updated with unknown types, not an error
      # in the object under test. Extend the unittest capabilities to fix.
      raise NotImplementedError('Cannot handle feature type [%s]' %
                                type(feature))
  return tf.train.Example(features=tf.train.Features(
      feature=output_features)).SerializeToString()

In [186]:
class FakeUserData(object):
  """Container object that creates fake per-user data.
  Using the fake test data, create temporary per-user TFRecord files used for
  the test. Convert each feature-tuple to a `tf.Example` protocol buffer message
  and serialize it to the per-user file.
  """

  def __init__(self, test_data, temp_dir):
    """Construct a FakePerUseData object.
    Args:
      test_data: A list of tuples whose first element is the client ID and all
        subsequent elements are training example features.
      temp_dir: The path to the directory to store temporary per-user files.
    Returns:
      A dict of client IDs to string file paths to TFRecord files.
    """
    writers = {}
    client_file_dict = {}
    for example in test_data:
      client_id, features = example[0], example[1:]
      writer = writers.get(client_id)
      if writer is None:
        fd, path = tempfile.mkstemp(suffix=client_id, dir=temp_dir)
        # close the pre-opened file descriptor immediately to avoid leaking.
        os.close(fd)
        client_file_dict[client_id] = path
        writer = tf.python_io.TFRecordWriter(path=path)
        writers[client_id] = writer
      writer.write(_create_example(features))
    for writer in six.itervalues(writers):
      writer.close()
    self._client_data_file_dict = client_file_dict

  def create_test_dataset_fn(self, client_id):
    client_path = self._client_data_file_dict[client_id]
    features = {
#         '0': tf.FixedLenFeature(shape=[], dtype=tf.int64),
        '0': tf.FixedLenFeature(shape=[], dtype=tf.float32),
    }

    def parse_example(e):
      feature_dict = tf.parse_single_example(serialized=e, features=features)
      return tuple(feature_dict[k] for k in sorted(six.iterkeys(feature_dict)))

    return tf.data.TFRecordDataset(client_path).map(parse_example)

  @property
  def client_ids(self):
    return list(self._client_data_file_dict.keys())


In [230]:
temp_dir = tempfile.mkdtemp()
fakeUserData = FakeUserData(fed, temp_dir)

In [297]:
temp_dir_train = tempfile.mkdtemp()
temp_dir_test = tempfile.mkdtemp()
train_data = FakeUserData(fed[:-int(len(fed)/3)], temp_dir_train)
test_data = FakeUserData(fed[int(len(fed)/3):], temp_dir_test)

In [260]:
client1_data_raw = fakeUserData.create_test_dataset_fn('20396154')

In [None]:
BATCH_SIZE = 128
SEQ_LENGTH = 18
BUFFER_SIZE = 10000

BatchType = collections.namedtuple('BatchType', ['x', 'y'])

def to_ids(x):
    s = tf.reshape(x[0], shape=[1])
    return s  

def split_input_target(chunk):
    input_text = tf.reshape(tf.map_fn(lambda x: x[:12], chunk), shape=[BATCH_SIZE,2,1,6,1])
    target_text = tf.map_fn(lambda x: x[12:], chunk)
    return BatchType(input_text, target_text)


def preprocess(dataset):  
    return (dataset
            .map(to_ids)
            .apply(tf.data.experimental.unbatch())# Form example sequences of SEQ_LENGTH +1
            .batch(SEQ_LENGTH,  drop_remainder=True)# Shuffle and form minibatches
            .shuffle(BUFFER_SIZE)
            .batch(BATCH_SIZE, drop_remainder=True)
            .map(split_input_target))
client1_data = preprocess(client1_data_raw)
for c in client1_data.take(1):
    print(c)

### Load Model

In [127]:
keras_conv_lstm = tf.keras.models.load_model('models/conv_lstm_128_2_6.kerasmodel', compile=False)

In [148]:
keras_conv_lstm.predict(np.array([137,143,148,149,147,149,146,138,131,130,127,121]).reshape(((1, 2, 1, 6, 1))))

array([[114.02403, 112.042  , 109.5221 , 109.03717, 108.37824, 107.89183]],
      dtype=float32)

In [135]:
def compile(keras_model):
  keras_model.compile(
      optimizer=tf.keras.optimizers.SGD(lr=0.5),loss='mse')
  return keras_model

In [280]:
def create_tff_model():
  # TFF uses a `dummy_batch` so it knows the types and shapes
  # that your model expects.
  x = tf.constant(np.random.random_sample((BATCH_SIZE, 2, 1, 6, 1))*200)
  x = tf.cast(x, 'float32')
  y = tf.constant(np.random.random_sample((BATCH_SIZE, 6))*200)
  y = tf.cast(y, 'float32')
  dummy_batch = collections.OrderedDict([('x', x), ('y', y)]) 
  keras_model_clone = compile(tf.keras.models.clone_model(keras_model))
  return tff.learning.from_compiled_keras_model(
      keras_model_clone, dummy_batch=dummy_batch)

In [281]:
fed_avg = tff.learning.build_federated_averaging_process(model_fn=create_tff_model)

In [282]:
state = fed_avg.initialize()
state, metrics = fed_avg.next(state, [client1_data.take(1)])
print(metrics)

<loss=18893.996>


In [298]:
def data(client, source=train_data):
  return preprocess(
      source.create_test_dataset_fn(client)).take(2)

clients = ['20396154',
           '99296581']

train_datasets = [data(client) for client in clients]

# We concatenate the test datasets for evaluation with Keras.
test_dataset = functools.reduce(
    lambda d1, d2: d1.concatenate(d2),
    [data(client, test_data) for client in clients])

In [299]:
#@test {"skip": true}
NUM_ROUNDS = 3

# The state of the FL server, containing the model and optimization state.
state = fed_avg.initialize()

state = tff.learning.state_with_new_model_weights(
    state,
    trainable_weights=[v.numpy() for v in keras_model.trainable_weights],
    non_trainable_weights=[
        v.numpy() for v in keras_model.non_trainable_weights
    ])


def keras_evaluate(state, round_num):
  tff.learning.assign_weights_to_keras_model(keras_model, state.model)
  print('Evaluating before training round', round_num)
  keras_model.evaluate(example_dataset, steps=2)


for round_num in range(NUM_ROUNDS):
  keras_evaluate(state, round_num)
  # N.B. The TFF runtime is currently fairly slow,
  # expect this to get significantly faster in future releases.
  state, metrics = fed_avg.next(state, train_datasets)
  print('Training metrics: ', metrics)

keras_evaluate(state, NUM_ROUNDS + 1)

Evaluating before training round 0


RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.