[![Binder](https://mybinder.org/badge_logo.svg)](https://lab.mlpack.org/v2/gh/mlpack/examples/master?urlpath=lab%2Ftree%2Fcontact_tracing_clustering_with_dbscan%2F.ipynb)

In [1]:
/**
 * @file contact-tracing-dbscan-cpp.ipynb
 *
 * A simple contact tracing method using DBSCAN.
 * 
 * Once a person is tested positive for the virus,
 * it is very important to identify others who may
 * have been infected by the diagnosed patients.
 * To identify the infected people, a process called
 * contact tracing is often used. In this example, we
 * applied DBSCAN to perform pseudo location-based
 * contact tracing using GPS.
 */

In [2]:
!wget -q https://lab.mlpack.org/data/contact-tracing.csv

In [3]:
#include <mlpack/core.hpp>
#include <mlpack/methods/dbscan/dbscan.hpp>

#include <sstream>

In [4]:
// Header files to create and show the plot.
#define WITHOUT_NUMPY 1
#include "matplotlibcpp.h"
#include "xwidgets/ximage.hpp"

namespace plt = matplotlibcpp;

In [5]:
using namespace mlpack;

In [6]:
using namespace mlpack::dbscan;

In [7]:
using namespace mlpack::data;

In [8]:
// Load the pseudo location-based dataset for the contact tracing.
// The dataset has 4 columns: timestamp, latitude, longitude, id
arma::mat input;
DatasetInfo info;
data::Load("contact-tracing.csv", input, info);

In [9]:
// Plot ids with their latitudes and longitudes across the x-axis and y-axis respectively.
plt::figure_size(800, 800);

for (size_t i = 0; i < info.NumMappings(3); ++i)
{
    // Get the indices for the current label.
    arma::mat dataset = input.cols(arma::find(input.row(3) == (double) i));
    
    // Get the data for the indices.
    std::vector<double> x = arma::conv_to<std::vector<double>>::from(dataset.row(1));
    std::vector<double> y = arma::conv_to<std::vector<double>>::from(dataset.row(2));
    
    // Set the label for the legend.
    std::map<std::string, std::string> m;
    m.insert(std::pair<std::string, std::string>("label", info.UnmapString(i, 3)));
    
    plt::scatter(x, y, 10, m);
}

plt::xlabel("latitude");
plt::ylabel("longitude");
plt::title("ids with their latitudes and longitudes");
plt::legend();

plt::save("./plot.png");
auto im = xw::image_from_file("plot.png").finalize();
im

A Jupyter widget

In [10]:
// Generate clusters, and identify the infections by filtering the data in the clusters.

// Radial distance of 6 feet in kilometers.
const double epsilon = 0.0018288;

// Perform Density-Based Spatial Clustering of Applications with Noise
// (DBSCAN).
//
// For more information checkout https://mlpack.org/doc/mlpack-git/doxygen/classmlpack_1_1dbscan_1_1DBSCAN.html
// or uncomment the line below.
// ?DBSCAN<>
DBSCAN<> model(epsilon, 2 /* Minimum number of points for each cluster. */);

// We only use the latitude and longitude attribute.
const arma::mat points = input.submat(
    1, 0, input.n_rows - 2 , input.n_cols - 1);

// Perform clustering using DBSCAN, an return the number of clusters. 
arma::Row<size_t> assignments;
const size_t numCluster = model.Cluster(points, assignments);

In [11]:
// The model was able to generate 29 clusters, out of which cluster
// 0 to cluster 28 represents data points with neighboring nodes.
std::cout << "cluster: " << numCluster << std::endl;

cluster: 29


In [12]:
// Plot cluster with their latitudes and longitudes across the x-axis and y-axis respectively.
plt::figure_size(800, 800);

for (size_t i = 0; i < numCluster; ++i)
{
    // Get the indices for the current label.
    arma::mat dataset = input.cols(arma::find(assignments == i));
    
    // Get the data for the indices.
    std::vector<double> x = arma::conv_to<std::vector<double>>::from(dataset.row(1));
    std::vector<double> y = arma::conv_to<std::vector<double>>::from(dataset.row(2));
    
    // Set the label for the legend.
    std::map<std::string, std::string> m;
    m.insert(std::pair<std::string, std::string>("label", std::to_string(i)));
    
    plt::scatter(x, y, 10, m);
}

plt::xlabel("latitude");
plt::ylabel("longitude");
plt::title("ids with their latitudes and longitudes");
plt::legend();

plt::save("./plot.png");
auto im = xw::image_from_file("plot.png").finalize();
im

A Jupyter widget

In [13]:
// Check for people who had been in contact with the infected patient.
void PrintInfected(const std::string& infected /* Infacted id e.g. Judy. */,
                   DatasetInfo& info /* The dataset info object to map between ids and names. */,
                   const arma::Row<size_t>& assignments /* The generated cluster. */,
                   const size_t numCluster /* The number of found cluster. */)
{
    // Get id from name.
    double infectedId = info.MapString<double>(infected, 3);
    
    // Get infected clusters.
    arma::Mat<size_t> assignmentsTemp = assignments;
    arma::Mat<size_t> cluster = assignmentsTemp.cols(
        arma::find(input.row(3) == infectedId));
    
    // Filter out noise cluster.
    cluster = cluster.cols(arma::find(cluster <= numCluster));
    
    std::cout << "Infected: " << infected << std::endl;
    
    // Find all names that are in the same infected cluster.
    for (size_t c = 0; c < cluster.n_elem; ++c)
    {       
        arma::mat infectedIdsFromCluster = input.cols(
            arma::find(assignments == cluster(c)));

        if (infectedIdsFromCluster.n_cols <= 0)
            std::cout << "No people in the same cluster." << std::endl;
        else
            std::cout << "Maybe infected others in the cluster: ";
        
        for (size_t n = 0, g = 0; n < infectedIdsFromCluster.n_cols; ++n)
        {
            size_t id = infectedIdsFromCluster.col(n)(3);
            
            // Skip the name if it's the same as the infacted person.
            if (info.UnmapString(id, 3) == infected)
                continue;

            if (g == 0)
                std::cout << info.UnmapString(id, 3);
            else
                std::cout << "," << info.UnmapString(id, 3);
            
            g++;
        }
        
        std::cout << std::endl;
    }
}

In [14]:
// Check for the people who might be potentially infected from the patient.
PrintInfected("Heidi", info, assignments, numCluster)

Infected: Heidi
Maybe infected others in the cluster: David
Maybe infected others in the cluster: Judy


In [15]:
// // Check for the people who might be potentially infected from the patient.
PrintInfected("Alice", info, assignments, numCluster)

Infected: Alice
Maybe infected others in the cluster: Judy


In [16]:
// Check for the people who might be potentially infected from the patient.
PrintInfected("David", info, assignments, numCluster)

Infected: David
Maybe infected others in the cluster: Heidi


In [17]:
// Check for the people who might be potentially infected from the patient.
PrintInfected("Judy", info, assignments, numCluster)

Infected: Judy
Maybe infected others in the cluster: Heidi
Maybe infected others in the cluster: Alice


In [18]:
// Check for the people who might be potentially infected from the patient.
PrintInfected("Carol", info, assignments, numCluster)

Infected: Carol
Maybe infected others in the cluster: Frank,Grace
