# Privacy-Safe NetworkRetriever Demo

In [None]:
import os
import tqdm

from llama_index.core.llama_dataset.simple import LabelledSimpleDataset
from llama_index.networks.network.retriever import NetworkRetriever
from llama_index.networks.contributor.retriever import ContributorRetrieverClient

In [None]:
import nest_asyncio

nest_asyncio.apply()

### Load Symptom2Disease TestData

In [None]:
test_dataset = LabelledSimpleDataset.from_json("./symptom_2_disease_test.json")

In [None]:
test_dataset.to_pandas()[:5]

Unnamed: 0,reference_label,text,text_by
0,Common Cold,I'm constantly sneezing and my body is shaking...,human
1,Bronchial Asthma,"I have a cough that has continued for days, an...",human
2,Bronchial Asthma,"I've got a cough that won't go away, and I'm e...",human
3,Fungal infection,"My body has been itching terribly all over, an...",human
4,Acne,"When I awoke this morning, I saw a severe rash...",human


### NetworkRetriever

In [None]:
# setup ContributorClients to connect to ContributorServices
contributors = [
    ContributorRetrieverClient.from_config_file(
        env_file=f"./client-env-files/.env.contributor_{ix}.client"
    )
    for ix in range(1, 3)
]

#### Optional Reranker

In [None]:
from llama_index.postprocessor.cohere_rerank import CohereRerank

reranker = CohereRerank(top_n=5)

In [None]:
network_retriever = NetworkRetriever(
    contributors=contributors, rerank=True, reranker=reranker
)

### NetworkRetriever Results

In [None]:
network_retrievals = []
for example in tqdm.tqdm(test_dataset[:]):
    network_retrievals.append(await network_retriever.aretrieve(example.text))

100%|███████████████████████████████████████████████████████████████████████████████████| 240/240 [01:39<00:00,  2.42it/s]


### Contributor1 Retriever Results

In [None]:
contributor1_retriever = NetworkRetriever(contributors=contributors[:1])
contributor1_retrievals = []
for example in tqdm.tqdm(test_dataset[:]):
    contributor1_retrievals.append(await contributor1_retriever.aretrieve(example.text))

100%|███████████████████████████████████████████████████████████████████████████████████| 240/240 [00:56<00:00,  4.26it/s]


### Contributor2 Retriever Results

In [None]:
contributor2_retriever = NetworkRetriever(contributors=contributors[1:2])
contributor2_retrievals = []
for example in tqdm.tqdm(test_dataset[:]):
    contributor2_retrievals.append(await contributor2_retriever.aretrieve(example.text))

100%|███████████████████████████████████████████████████████████████████████████████████| 240/240 [00:55<00:00,  4.33it/s]


### Evaluations

In [None]:
from typing import List
from llama_index.core.schema import NodeWithScore


def compute_hit(retrieved_nodes: List[NodeWithScore], expected_label: str):
    return any(r.metadata["reference_label"] == expected_label for r in retrieved_nodes)


def compute_reciprocal_rank(retrieved_nodes: List[NodeWithScore], expected_label: str):
    for i, r in enumerate(retrieved_nodes):
        if r.metadata["reference_label"] == expected_label:
            return 1.0 / (i + 1)
    return 0

#### NetworkRetriever

In [None]:
network_retriver_hits = [
    compute_hit(r, ex.reference_label)
    for r, ex in zip(network_retrievals, test_dataset.examples)
]
network_retriver_reciprocal_ranks = [
    compute_reciprocal_rank(r, ex.reference_label)
    for r, ex in zip(network_retrievals, test_dataset.examples)
]

#### Contributor 1

In [None]:
contributor1_retriever_hits = [
    compute_hit(r, ex.reference_label)
    for r, ex in zip(contributor1_retrievals, test_dataset.examples)
]
contributor1_retriever_reciprocal_ranks = [
    compute_reciprocal_rank(r, ex.reference_label)
    for r, ex in zip(contributor1_retrievals, test_dataset.examples)
]

#### Contributor 2

In [None]:
contributor2_retriever_hits = [
    compute_hit(r, ex.reference_label)
    for r, ex in zip(contributor2_retrievals, test_dataset.examples)
]
contributor2_retriever_reciprocal_ranks = [
    compute_reciprocal_rank(r, ex.reference_label)
    for r, ex in zip(contributor2_retrievals, test_dataset.examples)
]

#### Summary Report

In [None]:
import pandas as pd

num_examples = len(test_dataset.examples)
summary_data = {
    "retriever": ["network", "contributor1", "contributor2"],
    "hit rate": [
        sum(network_retriver_hits) / num_examples,
        sum(contributor1_retriever_hits) / num_examples,
        sum(contributor2_retriever_hits) / num_examples,
    ],
    "mean reciprocal rank": [
        sum(network_retriver_reciprocal_ranks) / num_examples,
        sum(contributor1_retriever_reciprocal_ranks) / num_examples,
        sum(contributor2_retriever_reciprocal_ranks) / num_examples,
    ],
}
summary_df = pd.DataFrame(summary_data)
summary_df

Unnamed: 0,retriever,hit rate,mean reciprocal rank
0,network,0.829167,0.709097
1,contributor1,0.470833,0.422917
2,contributor2,0.416667,0.368333
