# IPFS: Bitswap monitoring

## Requirements
1. Install python dependencies by `pip install -r requirements.txt`
2. Running IPFS deamon

In [1]:
import time
import subprocess
import sys
import humanfriendly
import re
import pathlib

import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

# Custom libraries
import utils
from ipfs import Ipfs

# ============== PICK THE object_CID that you want to download ==============
# XKCD         - 107 MB - QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm
# Old internet - 210 MB - QmbsZEvJE8EU51HCUHQg2aem9JNFmFHdva3tGVYutdCXHp
# Others can be found here: https://awesome.ipfs.io/datasets/
# ===========================================================================

object_CID = 'QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm'
ipfs = Ipfs()

## Download file

In [None]:
maximum_peers = 0
partners_df = pd.DataFrame()

ipfs.is_daemon_active()

if not ipfs.is_daemon_active():
    print("Be sure to have the IPFS daemon running.")
else:
    print("THIS MAY TAKE A WHILE...\n")
    object_stat = ipfs.get_object_stat(object_CID)
    
    # Show file stats
#     print("DAG BROWSE")
#     for file in ipfs.ls(object_CID)["Objects"][0]["Links"]:
#         print(f"- {file['Size']:<15} {file['Name']}")
        
    print("\nFILE STATS")
    print("Size: " + humanfriendly.format_size(object_stat["CumulativeSize"], binary=True))
    print(f"Block size: {object_stat['BlockSize']} byte")
    print(f"Links: {object_stat['NumLinks']}")

    # Download
    print("\nDOWNLOAD PROGRESS")
    path = "downloads/" + object_CID
    process = subprocess.Popen(['ipfs', 'get', object_CID, '-o', path],
                         stdout=subprocess.PIPE, 
                         stderr=subprocess.STDOUT,
                         universal_newlines=True)
    
    while True:
        # Analysis
        peers = ipfs.get_swarm_peers()
        partners = ipfs.get_bitswap_partners()
        maximum_peers = max(maximum_peers, len(peers))
        
        # Progress bar
        for line in iter(process.stdout.readline,''):
            m = re.search("([\s\d\.]*)([\sa-zA-Z]*)/([\s\d\.]*)([\sa-zA-Z]*)([\d\.]*\%)(.*)", line)
            if m:
                remaining_time = m.group(6).strip()
                current_state = f"\tState: {m.group(1).strip()} {m.group(2).strip()} / {m.group(3).strip()} {m.group(4).strip()}"
                downloaded_bytes = humanfriendly.parse_size(m.group(1).strip() + m.group(2).strip())
                total_bytes = humanfriendly.parse_size(m.group(3).strip() + m.group(4).strip())
                utils.printProgressBar(float(downloaded_bytes), float(total_bytes), prefix = 'Progress:', suffix=(current_state + "\t" + remaining_time + "\tPeers " + str(len(partners)) + "/" + str(len(peers))) + '\t')
               
            partners_df = utils.partners_to_df(partners, partners_df)


        # Finish
        return_code = process.poll()
        if return_code is not None:
            if return_code != 0:
                print("\nAn error occurred with return code: " + str(return_code))
            else:  
                # Save stats    
                pathlib.Path("dataset/").mkdir(parents=True, exist_ok=True)
                file_path = pathlib.Path("dataset/" + object_CID + ".csv")
                partners_df.to_csv(file_path, index=False)
            
            # Read rest of the output 
            for output in process.stdout.readlines():
                print(output.strip())
          
            break

THIS MAY TAKE A WHILE...


FILE STATS
Size: 106.9 MiB
Block size: 116233 byte
Links: 1864

DOWNLOAD PROGRESS
Progress: |------------------------------| 2.7% 	State: 2.87 MiB / 106.90 MiB	08m11s	Peers 6/20	0	

## After download Stats

In [25]:
file_path = pathlib.Path("downloads/")

if not ipfs.is_daemon_active():
    print("Be sure to have the IPFS daemon running.")
elif not file_path.exists():
    print("Be sure to download the file before.")
else:
    print("Be aware that if a GC of the repo was disabled, this information may not be reliable.\n")
    peers = ipfs.get_swarm_peers()

    # Bitswap stats
    bitswap_stat = ipfs.get_bitswap_stat(False)
    duplicates_block = bitswap_stat['DupBlksReceived'] / bitswap_stat['BlocksReceived'] * 100
    duplicates_data = bitswap_stat['DupDataReceived'] / bitswap_stat['DataReceived'] * 100
    contributors = len(partners.keys()) / maximum_peers * 100
    
    humanfriendly_data_received = humanfriendly.format_size(bitswap_stat["DataReceived"], binary=True)
    print(f"{bitswap_stat['BlocksReceived']} blocks received, {duplicates_block:.2f}% were duplicates")
    print(f"{humanfriendly_data_received} data received, {duplicates_data:.2f}% was duplicate")
    print(f"{maximum_peers} peers were in the swarm, {contributors:.2f}% contributed")
            
    # Ledger stats
    partners = ipfs.get_bitswap_partners()
    print(f"\nPeer partners ({len(partners.keys())}):")
    utils.show_peers_table(partners)

Be aware that if a GC of the repo was disabled, this information may not be reliable.

15008 blocks received, 40.98% were duplicates
169.74 MiB data received, 36.25% was duplicate
8 peers were in the swarm, 75.00% contributed

Peer partners (6):


In [26]:
# Prepare data
df = pd.read_csv(pathlib.Path("dataset/" + object_CID + ".csv"))  

x = pd.DataFrame({
    'Count': df.groupby(["Peer", "Country", "Lat", "Lon"]).size()
})

last_records = df[df['Timestamp'].str.startswith(df['Timestamp'].iloc[-1])]

peers = last_records.groupby(['Peer', 'Country', 'Lat', 'Lon']).agg({'Recv': ['max'], 'Latency': ['mean']}).reset_index()

country_latency = last_records.groupby(['Country']).agg({'Latency': ['mean']}).reset_index()

country_contributors = pd.DataFrame({
    'Count': last_records.groupby(["Country"]).size(),
    'Latency': last_records.groupby(["Country"]).mean()["Latency"]
}).sort_values(by=["Count", "Country"], ascending=[False, True]).reset_index()

# Show graphs
fig = px.scatter_geo(country_contributors,
                     locationmode="country names",
                     locations="Country",
                     color="Country",
                     hover_name="Country",
                     projection="natural earth",
                     size="Count",
                     labels={"Country": "Country", "Count":"Count"},
                     title="Peer for countries")
fig.show()

fig = px.bar(country_contributors, x="Country", y="Count", color="Country", title="Peer for countries", labels={"Count":"Peer"})
fig.show()

fig = px.bar(country_contributors, x="Country", y="Latency", color="Country", title="Average Latency for countries", labels={"Latency":"Latency (ms)"})
fig.show()