<a href="https://colab.research.google.com/github/polis-community/red-dwarf/blob/main/docs/notebooks/polis-implementation-demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install --quiet red-dwarf[all]@git+https://github.com/polis-community/red-dwarf.git@main

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m145.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.1/116.1 kB[0m [31m176.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m154.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m138.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.4/69.4 kB[0m [31m162.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for red-dwarf (pyproject.toml) ... [?25l[?25hdone
  Building wheel for annoy (setup.py) ... [?25l[?25hdone


In [2]:
from reddwarf.data_loader import Loader
import json
import numpy as np

# Topic: What were the most significant developments in tech and politics in 2018?
# 5 groups, 65 ptpts (56 grouped), 43 comments (open)
REPORT_ID="r2dfw8eambusb8buvecjt"
# REPORT_ID="r6ipxzfudddppwesbmtmn" # Alternative Polis convo

print(f"Loading data from https://pol.is/report/{REPORT_ID}")

# We'll use the data_loader utility to simply load vote data.
loader = Loader(polis_id=REPORT_ID)
votes = loader.votes_data

# Show what our raw vote data looks like:
print(json.dumps(votes[0], indent=2))

# Show what our raw statement data looks like:
statements = loader.comments_data
print(json.dumps(statements[0], indent=2))

# We need this for a little helper later.
math_data = loader.math_data

Loading data from https://pol.is/report/r2dfw8eambusb8buvecjt
{
  "participant_id": 0,
  "statement_id": 0,
  "vote": 1,
  "weight_x_32767": 0,
  "modified": 1544544810393.0,
  "conversation_id": "6jrufhr6dp",
  "datetime": null
}
{
  "txt": "I feel Blockchain technologies have been over-hyped.",
  "statement_id": 1,
  "created": "2018-12-11T16:13:43.605000Z",
  "tweet_id": null,
  "quote_src_url": null,
  "is_seed": false,
  "is_meta": false,
  "lang": "en",
  "participant_id": 0,
  "velocity": 1,
  "moderated": -1,
  "active": true,
  "agree_count": 12,
  "disagree_count": 2,
  "pass_count": 1,
  "count": 15,
  "conversation_id": "6jrufhr6dp",
  "datetime": null
}


In [3]:
# Now we use out custom polis implementation to recreate the Polis calculations.
from reddwarf.implementations.polis import run_clustering
from reddwarf.utils.statements import process_statements
from reddwarf.utils.polismath import get_corrected_centroid_guesses

# In this conversation, any -1 is moderated out. Matches upstream behavior.
# TODO: Investigate why is_strict_moderation doesn't affect this.
_, _, mod_out_statement_ids, meta_statement_ids = process_statements(statements)
print(f"{math_data['mod-out']=}")
print(f"{mod_out_statement_ids=}")

# We can run this from scratch, but kmeans is non-deterministic and might find slightly different clusters
# or even different k-values (number of groups) if the silhouette scores it finds are better.
# To show how to reproduce Polis results, we'll set init guess coordinates that we know polis platform got:
init_cluster_center_guesses = get_corrected_centroid_guesses(math_data)
print(f"{init_cluster_center_guesses=}")

math_data['mod-out']=[0, 1, 31]
mod_out_statement_ids=[0, 1, 31]
init_cluster_center_guesses=[[2.3249928691116573, -0.660737300795657], [-0.2555363550500295, 0.23787153431450953], [-1.2703643531444606, -1.913074376569441], [1.3341169243032838, 1.4244873357833063], [-3.2582815788575115, 1.4124353685383775]]


In [13]:
# Now with those guessed cluster centers seeded, we'll run the whole calculation from raw vote data.
result = run_clustering(
    votes=votes,
    mod_out_statement_ids=mod_out_statement_ids,
    meta_statement_ids=meta_statement_ids,
    # If clustering is getting ready to find a new k, more need to uncomment
    # this to properly reproduce Polis visualization.
    #
    # force_group_count=len(init_cluster_center_guesses),
    init_centers=init_cluster_center_guesses,
    # Polis has some edge-cases logic that keeps arbitrary [early] participants in
    # the clustering algorithm for reasons that are hard to reproduce, so we
    # borrow these.
    # See: https://github.com/compdemocracy/polis/pull/1893#issuecomment-2654666421
    keep_participant_ids=math_data["in-conv"],
)

from itertools import islice
from pprint import pprint

def take(n, iterable):
    """Return the first n items of the iterable as a list."""
    return dict(islice(iterable.items(), n))

pprint(take(5, result.participant_projections))
pprint(take(5, result.statement_projections))

{0: array([1.97302193, 1.26845666]),
 1: array([ 3.43041183, -1.16720882]),
 2: array([1.01692331, 0.47138823]),
 3: array([1.12613927, 0.08307809]),
 4: array([1.26019023, 0.80821615])}
{0: array([0., 0.]),
 1: array([0., 0.]),
 2: array([0.93125323, 0.26783686]),
 3: array([ 0.87136986, -1.24063318]),
 4: array([ 1.25036248, -0.63584169])}


In [14]:
from reddwarf.data_presenter import generate_figure_polis

# flip_y sometimes needed to look like Polis interface
generate_figure_polis(result, show_guesses=True, flip_y=False)
# Note: the red points (group '-1') are the cluster center guesses that we fed into the KMeans algorithm.


AttributeError: 'PolisClusteringResult' object has no attribute 'kmeans'

You can see that this looks exactly how the Polis visualization looks!

![screenshot of the polis report](https://imgur.com/blkIEtW.png)

In [15]:
from reddwarf.data_presenter import print_selected_statements

print_selected_statements(result=result, statements_data=statements)


# CONSENSUS STATEMENTS

## FOR AGREEMENT

* Authoritarian populist parties worldwide figured out how to weaponize trust and social media, winning elections.
    86% of everyone who voted on statement 28 agreed.

* We realized that information warfare is occurring by nonstate actors in destabilizing the international order
    80% of everyone who voted on statement 20 agreed.

* 2018 has been marked by the troubling rise of authoritarian leaders around the world.
    88% of everyone who voted on statement 39 agreed.

* The conversation about ethical uses of technology has reached a tipping point. Citizens, businesses and governments are on it, but baffled.
    77% of everyone who voted on statement 27 agreed.

* 2018 was the year Americans stopped thinking Silicon Valley was “different” or distinct from Wall St or the military industrial complex
    74% of everyone who voted on statement 23 agreed.

## FOR DISAGREEMENT

None.


# GROUP-REPRESENTATIVE STATEMENTS

## GROUP A

* Major regu