In [1]:
%%script bash
g++ --version | grep g++
#wget https://raw.githubusercontent.com/lhprojects/blog/master/JupyterNotebooks/cxx.cxx -O cxx.cxx
g++ -std=c++03 cxx.cxx -o cxx && echo cxx installed to current directory

g++ (Ubuntu 10.2.0-5ubuntu1~20.04) 10.2.0
cxx installed to current directory


In [231]:
%%bash

#wget https://raw.githubusercontent.com/orlp/pdqsort/master/pdqsort.h
#wget https://raw.githubusercontent.com/Morwenn/vergesort/master/vergesort.h
#wget https://raw.githubusercontent.com/skarupke/ska_sort/master/ska_sort.hpp

In [234]:
%%script ./cxx -std=c++17 -O3 -L/home/liangh/projects/abseil-cpp/install/lib -labsl_hash -labsl_raw_hash_set -labsl_wyhash -labsl_hashtablez_sampler -labsl_base -labsl_city -I/home/liangh/projects/sparsehash/install/include -I/home/liangh/projects/abseil-cpp/install/include

#include <random>
#include <unordered_map>
#include <unordered_set>
#include <map>
#include <set>
#include <algorithm>
#include <chrono>
#include <stdio.h>
#include <mutex>
#include <list>
#include <sparsehash/dense_hash_set>
#include <sparsehash/sparse_hash_set>
#include <absl/container/flat_hash_set.h>
#include "ska_sort.hpp"
#include "pdqsort.h"
#include "vergesort.h"
#include <climits>

std::vector<int> get_vector(int n) {
    std::mt19937 gen(0);
    std::uniform_int_distribution<> uni(0, n);
    std::vector<int> v(n);
    for(auto &e : v) {
        e = uni(gen);
    }
    return v;
}



std::vector<int> make_vector_skasort(std::vector<int> const &v) {
    auto v_ = v;
    ska_sort(v_.begin(), v_.end()); // radix sort
    auto new_end = std::unique(v_.begin(), v_.end());
    v_.resize(new_end - v_.begin());
    v_.shrink_to_fit();
    return v_;    
}

std::vector<int> make_vector_stdsort(std::vector<int> const &v) {
    auto v_ = v;
    std::sort(v_.begin(), v_.end()); // introsort
    //pdqsort(v_.begin(), v_.end()); // better quicksort
    //vergesort::vergesort(v_.begin(), v_.end()); // better quicksort
    auto new_end = std::unique(v_.begin(), v_.end());
    v_.resize(new_end - v_.begin());
    v_.shrink_to_fit();
    return v_;    
}

std::list<int> make_list(std::vector<int> const &v) {
    auto v_ = v;
    std::sort(v_.begin(), v_.end());  
    auto new_end = std::unique(v_.begin(), v_.end());
    v_.resize(new_end - v_.begin());
    return std::list<int>(v_.begin(), v_.end());    
}

template<class C>
C make_set(std::vector<int> const &v) {
    return C(v.begin(), v.end());
}   

template<class C>
C make_densehashset(std::vector<int> const &v) {
    C c;
    c.set_empty_key(INT_MIN);
    for(auto e : v) {
        c.insert(e);
    }
    return c;
}   

template<class C>
C make_map(std::vector<int> const &v) {
    C s;
    for (auto e : v) {
        s[e] = 0;
    }
    return s;
}

struct Lookup {
    
    
    template<class C>
    int operator()(C const &v) {
        int c = 0;
        for(int i = 0; i < (int)v.size(); ++i) {
            c += v.find(i) != v.end();
        }
        return c;
    }    
    
    int operator()(std::vector<int> const &v) {
        int c = 0;
        for(int i = 0; i < (int)v.size(); ++i) {
            c += std::binary_search(v.begin(), v.end(), i);
        }
        return c;        
    }    
    
    int operator()(std::list<int> const &v) {
        return 0;
    }    
    
};


struct Visitor {
    
    template<class C>
    int operator()(C const &v) {
        int c = 0;
        for(auto e  : v) {
            c += e;
        }
        return c;
    }    
    
};


struct VisitorForMap {
    
    template<class C>
    int operator()(C const &v) {
        int c = 0;
        for(auto e : v) {
            c += e.first;
        }
        return c;
    }   
};

static bool once_flag;
template<class Make, class Visit, class Lookup>
void timeit(std::string const &title, Make make, Visit visit, Lookup lookup) {
    using namespace std::chrono;
    auto t0 = high_resolution_clock::now();
    auto container = make();    
    auto t1 = high_resolution_clock::now();
    int c1 = lookup(container);    
    auto t2 = high_resolution_clock::now();    
    int c2 = visit(container);    
    auto t3 = high_resolution_clock::now();    
    
    std::chrono::duration<double, std::milli> d1 = t1 - t0;
    std::chrono::duration<double, std::milli> d2 = t2 - t1;
    std::chrono::duration<double, std::milli> d3 = t3 - t2;
    
    if(!once_flag) {
        once_flag = true;
        printf("%20s %10s %15s %10s %15s %10s\n", "title", "make",  "lookup-res.", "lookup", "iterate-res.", "iterate");        
    }
    printf("%20s %10.1f %15d %10.1f %15d %10.1f\n", title.c_str(), d1.count(), c1, d2.count(), c2, d3.count());
}

int main() {    
    
    auto v = get_vector(2000000);    
    timeit("vector_stdsort", [&v](){ return make_vector_stdsort(v); }, Visitor(), Lookup());
    timeit("vector_skasort", [&v](){ return make_vector_skasort(v); }, Visitor(), Lookup());
    timeit("set", [&v](){ return make_set<std::set<int> >(v); }, Visitor(), Lookup());    
    timeit("map", [&v](){ return make_map<std::map<int,int> >(v); }, VisitorForMap(), Lookup());
    timeit("unordered_set", [&v](){ return make_set<std::unordered_set<int> >(v); }, Visitor(), Lookup());
    timeit("unordered_map", [&v](){ return make_map<std::unordered_map<int,int> >(v); }, VisitorForMap(), Lookup());
    timeit("vector-list", [&v](){ return make_list(v); }, Visitor(), Lookup());
    timeit("dense_has_set", [&v](){ return make_densehashset<google::dense_hash_set<int> >(v); }, Visitor(), Lookup());
    timeit("sparse_has_set", [&v](){ return make_set<google::sparse_hash_set<int> >(v); }, Visitor(), Lookup());
    timeit("flat_has_set", [&v](){ return make_set<absl::flat_hash_set<int> >(v);    }, Visitor(), Lookup());
    
}

               title       make     lookup-res.     lookup    iterate-res.    iterate
      vector_stdsort      121.9          799947       40.8     -1497054351        0.2
      vector_skasort       33.0          799947       40.8     -1497054351        0.1
                 set      809.8          799947       86.9     -1497054351      105.5
                 map     1217.3          799947       87.3     -1497054351      104.8
       unordered_set      218.1          799947       12.7     -1497054351        3.2
       unordered_map      203.1          799947       26.8     -1497054351       32.9
         vector-list      137.3               0        0.0     -1497054351        3.8
       dense_has_set       43.4          799947        5.0     -1497054351        6.7
      sparse_has_set       98.5          799947       25.8     -1497054351        3.7
        flat_has_set       75.6          799947       12.6     -1497054351        4.5
