# Federated Fraud Demo with logistic regression


In [None]:
# Libraries
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from xain_sdk import ParticipantABC, run_participant, configure_logging


import pathlib
import os
from io import BytesIO

INPUTS_PATH = os.environ.get("NEVERMINED_INPUTS_PATH")
COORDINATOR_URL = "http://172.17.0.2:8081"
np.random.seed(123)

In [None]:
# get input file path
input_file_path = next(pathlib.Path(INPUTS_PATH).rglob("*/creditcard*.csv")).as_posix()

# Load target dataset
df = pd.read_csv(input_file_path)

feature_names = df.iloc[:, 1:30].columns
target = df.iloc[:1, 30:].columns

data_features = df[feature_names]
data_target = df[target]

x_train, x_test, y_train, y_test = train_test_split(data_features, data_target, 
                                                    train_size = 0.70, test_size = 0.30,
                                                    random_state = 1)

In [None]:

class Participant(ParticipantABC):

    def __init__(self, x_train, x_test, y_train, y_test):
        super(Participant, self).__init__()
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train
        self.y_test = y_test
        self.round = 1
        self.model = LogisticRegression(max_iter=1000)

    def get_weights(self):
        return self.model.coef_

    def set_weights(self, weights):
        self.model.coef_ = weights.reshape(1, 29)

    def train_round(self, training_input):
        # set global model weights
        if training_input is not None:
            self.set_weights(training_input)

        # select a subset of the data to train
        x_train = self.x_train.sample(frac=0.1)
        y_train = self.y_train.loc[x_train.index, :]

        # train
        self.model.fit(x_train, y_train)

        # check accuracy
        pred = self.model.predict(self.x_test)
        print(f"[{self.round}] Accuracy: {accuracy_score(self.y_test, pred)}")
        self.round += 1

        return (self.get_weights(), len(x_train))

    def serialize_training_result(self, training_result):
        (weights, number_of_samples) = training_result

        # reshape weights
        weights = weights.reshape(29,)

        writer = BytesIO()
        writer.write(number_of_samples.to_bytes(4, byteorder="big"))
        np.save(writer, weights, allow_pickle=False)
        return writer.getbuffer()[:]

    def deserialize_training_input(self, data):
        if not data:
            return None

        reader = BytesIO(data)
        return np.load(reader, allow_pickle=False)

In [None]:
configure_logging(log_http_requests=True)

participant = Participant(x_train, x_test, y_train, y_test)
run_participant(participant, COORDINATOR_URL)