# Local SyMPC implementation
PySyft's SyMPC library is currently still in development. Thus, there are several bugs. Unfortunately, the `.reconstruct()` function does not work for the remote implementation of Secure Multiparty Computation. Thus, this notebook serves to demonstrate a local version (i.e., all agents in one notebook) how the City agent can reconstruct the encrypted MPCTorches obtained by the Manufacturers.

---

#### 0.1 - Imports

In [1]:
import warnings

import folium
import geopandas as gpd
import numpy as np
import pandas as pd
import syft as sy
import torch
from branca.colormap import linear
from folium.plugins import TimeSliderChoropleth
from sympc.session import Session
from sympc.session import SessionManager

warnings.simplefilter(action='ignore', category=FutureWarning)

#### 0.2 – Functions

In [2]:
# Define a function that creates the same tensors that the City Agent
# Should have received through the duet stores
def get_tensor(n: int) -> torch.Tensor:
    """
    Get tensor from dataset stored in data/anonymous{n}/
    Args:
        n: number of anonymous agent in whose directory to look

    Returns: tensor with relevant information

    """
    # Init zipcode df and make it (zipcode x hour) (zipcode data from https://daten.odis-berlin.de/de/dataset/plz/)
    df_zipcode = pd.read_csv("data/geo/berlin_zipcodes.csv").rename(columns={"plz": "zipcode"})
    df_zipcode = df_zipcode.set_index("zipcode").reindex(columns=list(range(0, 24))).replace(np.nan, 0)

    # Get dataframe and get hour of every entry from timestamp
    df = pd.read_csv("data/anonymous{i}/data.csv".format(i=n, index_col=0))
    df["hour"] = df.timestamp.apply(lambda x: int(x[11:13]))

    # Transform df to be (zipcode x hour) (this includes only the zipcodes within the dataset)
    df_co2 = df[["zipcode", "hour", "co2_grams"]].groupby(["zipcode", "hour"]).sum().reset_index()
    df_co2 = df_co2.pivot(index=["zipcode"], columns=["hour"])["co2_grams"].replace(np.nan, 0)

    # Merge the dataframes
    df_hourly_co2 = df_zipcode.add(df_co2, fill_value=0)

    # Create tensor and return
    hourly_co2_tensor = torch.Tensor(df_hourly_co2.values)
    return hourly_co2_tensor


def reconstruct_dataset(tensor: torch.Tensor) -> pd.DataFrame:
    """
    Creates dataframe from reconstructed tensor with ZIP information and 24-hour rhythm
    Args:
        tensor: tensor shape (194 x 24) with CO2 emission information per zip code per hour

    Returns: pandas dataframe with information from tensor

    """
    df_zipcode = pd.read_csv("data/geo/berlin_zipcodes.csv").rename(columns={"plz": "zipcode"})

    data = pd.DataFrame(tensor).astype("float")
    data["zipcode"] = df_zipcode["zipcode"].tolist()
    data = data.groupby("zipcode").sum()

    data = data.rename(
        columns={0: "0", 1: "1", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7", 8: "8", 9: "9", 10: "10", 11: "11",
                 12: "12", 13: "13", 14: "14", 15: "15", 16: "16", 17: "17", 18: "18", 19: "19", 20: "20", 21: "21",
                 22: "22", 23: "23"})

    return data


def get_styledict(geo_data) -> dict:
    """
    Create styledict as input for the TimeSliderChoropleth visualization
    Args:
        geo_data: geopandas dataframe

    Returns: dictionary

    """
    # Get time index
    datetime_index = pd.date_range("2021-08-19 00:00:00", periods=24, freq="H")
    dt_index_epochs = datetime_index.astype(int) // 10 ** 9
    dt_index = dt_index_epochs.astype("U10")

    # Create necessary styledata dict
    styledata = {}

    for zipcode, item in geo_data.iterrows():
        df_list = []
        for hour in range(0, 24):
            hour = str(hour)
            #entry = [dt_index[int(hour)], np.random.normal(1), item[hour]]
            entry = [dt_index[int(hour)], item[hour], item[hour]]
            df_list.append(entry)
        df = pd.DataFrame(df_list, columns=['hour', 'color', 'opacity']).set_index("hour")
        df = df.cumsum().sort_index()
        styledata[zipcode] = df

    # Get min and max values
    max_color, min_color, max_opacity, min_opacity = 0, 100, 0, 100
    for zipcode, info in styledata.items():
        max_color = max(max_color, info["color"].max())
        min_color = min(min_color, info["color"].min())
        max_opacity = max(max_opacity, info["opacity"].max())
        min_opacity = min(min_opacity, info["opacity"].min())

    # Normalize values
    #cmap = linear.PuRd_09.scale(min_color, max_color)
    cmap = linear.BuPu_09.scale(min_color, max_color)  #.scale(min_color, max_color)

    for zipcode, info in styledata.items():
        if any(info.opacity.values > 0):
            array = (info.opacity - min_opacity) / (max_opacity - min_opacity)
        else:
            array = info.opacity.values
        info.opacity = array
        info["color"] = info["color"].apply(cmap)

    # Finalize styledict
    styledict = {str(zipcode): data.to_dict(orient="index") for zipcode, data in styledata.items()}
    return styledict

### 1 – Setup Agents: Manufacturer1 🚗, Manufacturer2 🚛, Manufacturer3 🛵, and City 🏙️

#### 1.1 – Init virtual machines for agents

In [3]:
# Define virtual machines that would be used for the computation
c_vm = sy.VirtualMachine(name="city") #@todo: should the city be part of this???
a1_vm = sy.VirtualMachine(name="anonymousagent1")
a2_vm = sy.VirtualMachine(name="anonymousagent2")
a3_vm = sy.VirtualMachine(name="anonymousagent3")

#### 1.2 – Get root clients

In [4]:
# Get clients from each VM
city = c_vm.get_root_client()
anonymous1 = a1_vm.get_root_client()
anonymous2 = a2_vm.get_root_client()
anonymous3 = a3_vm.get_root_client()

#### 1.3 – Setup session for the computation

In [5]:
# Setup a session for the computation
session = Session(parties=[city, anonymous1, anonymous2, anonymous3]) # Add city into the parties here, because city is also part of Duet's
SessionManager.setup_mpc(session)

### 2 – Get data

#### 2.1 – Get secret tensors 🔒🚗, 🔒🚛, 🔒🛵

In [10]:
# Get private values to share from each of the anonymous agents (i.e., manufacturers)
x_secret = get_tensor(1)
y_secret = get_tensor(2)
z_secret = get_tensor(3)

# Print raw secret (what any data owner would see)
print(x_secret)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


#### 2.2 – Share secrets in session

In [11]:
# Share the secret between all members of the session
x = x_secret.share(session=session)
y = y_secret.share(session=session)
z = z_secret.share(session=session)

# See shared secret of x (what non-data owners would see)
print(x)

[MPCTensor]
Shape: torch.Size([194, 24])
Requires Grad: False
	| <VirtualMachineClient: city Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent1 Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent2 Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent3 Client> -> ShareTensorPointer


#### 2.3 – Reconstruct data 🔐(🚗 + 🚛 + 🛵)

In [12]:
# Addition on secret shared values:
result = x + y + z
print(result)

[MPCTensor]
Shape: torch.Size([194, 24])
Requires Grad: False
	| <VirtualMachineClient: city Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent1 Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent2 Client> -> ShareTensorPointer
	| <VirtualMachineClient: anonymousagent3 Client> -> ShareTensorPointer


In [13]:
result_reconstructed = result.reconstruct()
print(result_reconstructed)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


### 3 – Data Analytics
#### 3.1 – Get datasets

In [14]:
# Get reconstructed data and geodata of Berlin
df = reconstruct_dataset(result_reconstructed)
gdf = gpd.read_file("data/geo/berlin_zipcodes.geojson").rename(columns={"plz":"zipcode"})
gdf.zipcode = gdf.zipcode.astype(int)
gdf = gdf.copy().set_index("zipcode")

# Merge the two datasets
geo_data = gdf.merge(df, right_index=True, left_index=True)
geo_data.head()

Unnamed: 0_level_0,geometry,0,1,2,3,4,5,6,7,8,...,14,15,16,17,18,19,20,21,22,23
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10115,"POLYGON ((13.36586 52.53566, 13.36829 52.53329...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10117,"POLYGON ((13.37374 52.52780, 13.37382 52.52770...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10119,"POLYGON ((13.39902 52.52701, 13.40134 52.52631...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10178,"POLYGON ((13.39902 52.52701, 13.39877 52.52679...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10179,"POLYGON ((13.40305 52.51217, 13.40261 52.51186...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 3.2 Plot Choropleth

In [15]:
# Get styledict for plotting
styledict = get_styledict(geo_data)

In [16]:
# Plot map

m = folium.Map(location= [52.52, 13.41], zoom_start=12, tiles="https://tiles.stadiamaps.com/tiles/alidade_smooth_dark/{z}/{x}/{y}{r}.png", attr="Stadia.AlidadeSmoothDark")

g = TimeSliderChoropleth(
    geo_data.to_json(),
    styledict=styledict,
    overlay=True
).add_to(m)

m