<a href="https://colab.research.google.com/github/patcon/valency-anndata/blob/main/example-usage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
%pip install --quiet \
  git+https://github.com/patcon/polis-client \
  git+https://github.com/patcon/valency-anndata

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [28]:
# Customize Jupyter notebook environment settings.
import pandas as pd
pd.options.display.max_rows = 6

In [29]:
import valency_anndata as val

adata = val.datasets.polis(source="https://pol.is/report/r2dfw8eambusb8buvecjt")
# These workl similarly to the above source:
#   - r2dfw8eambusb8buvecjt
#   - https://pol.is/6jrufhr6dp
#   - 6jrufhr6dp
#
# NOTE: Technically, there are some differences between
# 1. data fetched from CSV exports (best source when we have report_id) and
# 2. data fetched from API endpoints (best we can do when we only have conversation_id)

adata

AnnData object with n_obs × n_vars = 0 × 0
    uns: 'votes_csv', 'statements'

In [30]:
val.preprocessing.rebuild_vote_matrix(adata)

adata

AnnData object with n_obs × n_vars = 65 × 43
    uns: 'votes_csv', 'statements'

In [31]:
val.preprocessing.rebuild_vote_matrix(adata, trim_rule=0.8)

adata

AnnData object with n_obs × n_vars = 52 × 40
    uns: 'votes_csv', 'statements'

In [32]:
adata.uns["votes_csv"]

Unnamed: 0,timestamp,datetime,comment-id,voter-id,vote
0,1544544810,Tue Dec 11 2018 16:13:30 GMT+0000 (Coordinated...,0,0,1
1,1544544823,Tue Dec 11 2018 16:13:43 GMT+0000 (Coordinated...,1,0,1
2,1544557247,Tue Dec 11 2018 19:40:47 GMT+0000 (Coordinated...,1,1,1
...,...,...,...,...,...
985,1548327013,Thu Jan 24 2019 10:50:13 GMT+0000 (Coordinated...,23,63,1
1499,1665662071,Thu Oct 13 2022 11:54:31 GMT+0000 (Coordinated...,42,64,1
1476,1665662074,Thu Oct 13 2022 11:54:34 GMT+0000 (Coordinated...,40,64,-1


In [33]:
adata.uns["statements"]

Unnamed: 0,conversation_id,txt,tid,created,quote_src_url,is_seed,is_meta,lang,pid,velocity,mod,active,agree_count,disagree_count,pass_count,count
0,6jrufhr6dp,I feel Blockchain technologies have been over-...,1,1544544823605,,False,False,en,0,1,-1,True,12,2,1,15
1,6jrufhr6dp,"""Big data"" is no longer a hot topic.",3,1544557393305,,False,False,en,1,1,0,True,13,20,12,45
2,6jrufhr6dp,It became clear that the business model preval...,26,1544594128774,,False,False,en,17,1,0,True,21,5,9,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,6jrufhr6dp,For-profit tech companies will always be incen...,41,1544824910554,,False,False,en,52,1,0,True,9,0,2,11
41,6jrufhr6dp,"Social media facilitated the growth of ""illibe...",29,1544594443493,,False,False,en,17,1,1,True,25,7,4,36
42,6jrufhr6dp,I believe that the abuse of Facebook in the 20...,10,1544563901372,,False,False,en,4,1,0,True,13,16,10,39


# Dreamcode

In [34]:
from IPython.display import Image, display

# When we are executing prospective functionality that hasn't yet been written,
# we wrap it in a conditional and in the meantime output a placeholder/mockup.
#
# Thanks to Gregor Martynus (gr2m) for concept: https://github.com/gr2m/dreamcode.io
def dreamcode_exists(placeholder_image=None):
  if placeholder_image:
    display(Image(url=placeholder_image, width=500))

  # Dreamcode never exists yet.
  return False

## PCA

In [35]:
# See: https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html#dimensionality-reduction

if dreamcode_exists("https://imgur.com/N7K3cgf.png"):
  # See: https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.pca.html
  val.tools.pca(adata)
  # See: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.pca_variance_ratio.html
  val.plotting.pca_variance_ratio(adata, n_pcs=50, log=True)


In [36]:
if dreamcode_exists("https://imgur.com/VAI8zlg.png"):
  # See: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.pca.html
  val.plotting.pca(
      adata,
      color=["sample", "sample", "pct_counts_mt", "pct_counts_mt"],
      dimensions=[(0, 1), (2, 3), (0, 1), (2, 3)],
      ncols=2,
      size=2,
  )

## KMeans

In [37]:
# KMeans

if dreamcode_exists():
  # See: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html
  val.tools.kmeans(adata)

## UMAP

In [38]:
if dreamcode_exists("https://imgur.com/t7G45jo.png"):
  # See: https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html#nearest-neighbor-graph-construction-and-visualization
  # See: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.neighbors.html
  val.preprocessing.neighbors(adata)
  # See: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html
  val.tools.umap(adata)
  # See: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.umap.html
  val.plotting.umap(
      adata,
      color="kmeans",
      # Setting a smaller point size to get prevent overlap
      size=2,
  )


## Langevitour

In [39]:
if dreamcode_exists("https://imgur.com/OmmUxMo.png"):
  # See: https://logarithmic.net/langevitour/
  # See: https://colab.research.google.com/github/pfh/langevitour/blob/main/py/examples/langevitour.ipynb
  val.viz.langevitour(
      adata,
      color="kmeans",
      dimensions=["X_umap[0:2]", "X_pca[0:10]"],
  )


## DataMapPlot

In [40]:
if dreamcode_exists("https://imgur.com/IfrR6vp.png"):
  # See:
  # See: https://github.com/TutteInstitute/datamapplot/blob/main/doc/basic_usage.ipynb
  val.viz.datamapplot(
      adata,
      map_of="participants",
      basis="umap",
      color="kmeans",
  )

In [41]:
if dreamcode_exists("https://imgur.com/IfrR6vp.png"):
  # See: https://github.com/TutteInstitute/datamapplot/blob/main/doc/interactive_intro.ipynb
  val.viz.datamapplot(
      adata,
      map_of="statements",
      basis="umap",
      color="kmeans",
      interactive=True,
  )

## Perspective Explorer

In [42]:
if dreamcode_exists("https://imgur.com/yVOG22g.png"):
  val.viz.perspective_explorer(adata) # TODO