<a href="https://colab.research.google.com/github/patcon/valency-anndata/blob/main/example-usage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
%pip install --quiet \
  git+https://github.com/patcon/polis-client \
  git+https://github.com/patcon/valency-anndata

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [24]:
# Customize Jupyter notebook environment settings.
import pandas as pd
pd.options.display.max_rows = 6

In [25]:
import valency_anndata as val

adata = val.datasets.polis.load("https://pol.is/report/r29kkytnipymd3exbynkd")
# These workl similarly to the above source:
#   - r29kkytnipymd3exbynkd
#   - https://pol.is/3hfmicmybc
#   - 3hfmicmybc
#
# NOTE: Technically, there are some differences between
# 1. data fetched from CSV exports (best source when we have report_id) and
# 2. data fetched from API endpoints (best we can do when we only have conversation_id)

adata

AnnData object with n_obs × n_vars = 2737 × 1045
    var: 'content', 'participant_id_authored', 'created_date', 'is_seed', 'is_meta', 'moderation_state', 'language_original', 'language_current', 'is_translated'
    uns: 'votes', 'votes_meta', 'statements', 'statements_meta', 'source', 'schema'

In [26]:
val.preprocessing.rebuild_vote_matrix(adata, trim_rule=0.8)

adata

AnnData object with n_obs × n_vars = 2105 × 813
    var: 'content', 'participant_id_authored', 'created_date', 'is_seed', 'is_meta', 'moderation_state', 'language_original', 'language_current', 'is_translated'
    uns: 'votes', 'votes_meta', 'statements', 'statements_meta', 'source', 'schema'

In [27]:
# Spanish statements auto-translated to English.
adata.var

Unnamed: 0_level_0,content,participant_id_authored,created_date,is_seed,is_meta,moderation_state,language_original,language_current,is_translated
comment-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,"No se trata de guillotinar a los ricos, quemar...",0,1572705212934,True,False,-1,es,es,False
1,"Prioridades del Gobierno,recogiendo demandas c...",1,1572705358753,True,True,-1,es,es,False
2,Este #CambiodeGabinete significa el inicio de ...,1,1572705410503,True,True,-1,es,es,False
...,...,...,...,...,...,...,...,...,...
810,Elaborar una nueva constitucion de forma const...,2116,1573559302877,False,False,0,es,es,False
811,Destinar porcebtaje de fondos de FFAA a educac...,2116,1573559327627,False,False,0,es,es,False
812,Que se acabe la institución de carabineros y s...,2116,1573559368060,False,False,0,es,es,False


In [28]:
# Translate all statements into a common language. Let's try English.
#
# Done using unofficial [Google Translate] APIs, so no auth required!
# See: https://github.com/ssut/py-googletrans
val.datasets.polis.translate_statements(adata, translate_to="en")

adata.var

Unnamed: 0_level_0,content,participant_id_authored,created_date,is_seed,is_meta,moderation_state,language_original,language_current,is_translated
comment-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,"It is not about guillotining the rich, burning...",0,1572705212934,True,False,-1,es,en,True
1,"Government priorities, collecting citizen dema...",1,1572705358753,True,True,-1,es,en,True
2,This #CabinetChange means the beginning of a n...,1,1572705410503,True,True,-1,es,en,True
...,...,...,...,...,...,...,...,...,...
810,Prepare a new constitution in a constitutive w...,2116,1573559302877,False,False,0,es,en,True
811,Allocate percentage of Armed Forces funds to e...,2116,1573559327627,False,False,0,es,en,True
812,End the institution of police and establish a ...,2116,1573559368060,False,False,0,es,en,True


# Dreamcode

In [29]:
from IPython.display import Image, display

# When we are executing prospective functionality that hasn't yet been written,
# we wrap it in a conditional and in the meantime output a placeholder/mockup.
#
# Thanks to Gregor Martynus (gr2m) for concept: https://github.com/gr2m/dreamcode.io
def dreamcode_exists(placeholder_image=None):
  if placeholder_image:
    display(Image(url=placeholder_image, width=500))

  # Dreamcode never exists yet.
  return False

## Loading Datasets

In [30]:
if dreamcode_exists():
  # See: https://github.com/patcon/universal-polis-wrapper/
  adata = val.datasets.polis.load(source="https://pol.is/2demo")
  # This would augment Polis conversation data by importing
  # participant data from a typeform into anndata's adata.obs DataFrame
  # by opportunistically joining the data via any matched xids
  val.datasets.import_typeform(adata, typeform_id="wFXxYRdJ")

In [31]:
# Inspiration: https://scanpy.readthedocs.io/en/stable/api/datasets.html

if dreamcode_exists():
  # This would be a variant where likert scale data is loaded.
  #     - this would use `val.preprocessing.convert_likert` (see below)
  #     - this could either augment polis conversation data, or be used in lieu
  #       of Polis data, processing all data from Typeform responses.
  #
  # See: https://github.com/polis-community/red-dwarf/issues/89
  val.datasets.import_typeform(adata, typeform_id="wFXxYRdJ", likert_conversion=True)
  val.datasets.import_typeform(adata, typeform_id="wFXxYRdJ", likert_conversion="liberal")
  val.datasets.import_typeform(adata, typeform_id="wFXxYRdJ", likert_conversion="conversative")

  # Extract polislike valence data from CIP Global Dialogues on AI.
  #     - citizens of many countries surveyed over time.
  #     - pre-generated synthetic agree/disagree/pass data.
  #
  # See: https://globaldialogues.ai/about
  # See: https://globaldialogues.ai/download-data
  adata = val.datasets.load_cip_global_dialogues()

  # Extract polislike valence data from UTokyo Asahi Survey (UTAS).
  #     - pre-election survey of both voters and electoral candidates.
  #     - long history, running most years since 2003.
  #     - media collaboration gives rare high rate of politician responses.
  #     - repeats a substantial number of questions across survey waves.
  #
  # See: https://www.masaki.j.u-tokyo.ac.jp/utas/utasindex_en.html
  # See: https://github.com/nishio/UTAS-UMAP
  adata = val.datasets.load_utokyo_asahi_survey()

  # Extract polislike valence data from Convert World Values Survey (WVS).
  #     - well-known repeated survey of Swartz human values.
  #     - can specify data subset in various ways for convenience.
  #
  # See: https://www.worldvaluessurvey.org/WVSContents.jsp
  adata = val.datasets.load_world_values_survey(wave=8)
  adata = val.datasets.load_world_values_survey(years=1997)

  # Extract polislike valence data from European Social Survey (ESS).
  #     - well-known repeated survey of Swartz human values.
  #     - has sections also focussed on current events.
  #     - can specify data subset in various ways for convenience.
  #
  # See: https://ess.sikt.no/en/series/321b06ad-1b98-4b7d-93ad-ca8a24e8788a
  # See: https://github.com/ropensci/essurvey/issues/57#issuecomment-3643483042
  adata = val.datasets.load_european_social_survey(round=11)

## Preprocessing

In [32]:
if dreamcode_exists():
  # Process likert scale data into polislike/valence data. e.g., -1/0/+1
  #     - there are different ways to convert likert scales:
  #       - liberal: 12345 => DDPAA
  #       - strict: 12345 => DPPPA
  #     - there might be clever, per-participant ways to deduce thresholds
  #     - this is used as basis of processing many other datasets
  val.preprocessing.convert_likert(vote_data, conversion_scheme="DDPAA")

In [33]:
if dreamcode_exists():
  # Inspiration: https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.calculate_qc_metrics.html
  pbmc.var["mito"] = pbmc.var_names.str.startswith("MT-")
  sc.pp.calculate_qc_metrics(pbmc, qc_vars=["mito"], inplace=True)
  sns.jointplot(
      data=pbmc.obs,
      x="log1p_total_counts",
      y="log1p_n_genes_by_counts",
      kind="hex",
  )
  sc.pp.calculate_qc_metrics(adata, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=True)

## PCA

In [34]:
# Inspiration: https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html#dimensionality-reduction

if dreamcode_exists("https://imgur.com/N7K3cgf.png"):
  # Inspiration: https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.pca.html
  val.tools.pca(adata)
  # Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.pca_variance_ratio.html
  val.plotting.pca_variance_ratio(adata, n_pcs=50, log=True)


In [35]:
if dreamcode_exists("https://imgur.com/VAI8zlg.png"):
  # Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.pca.html
  val.plotting.pca(
      adata,
      color=["sample", "sample", "pct_counts_mt", "pct_counts_mt"],
      dimensions=[(0, 1), (2, 3), (0, 1), (2, 3)],
      ncols=2,
      size=2,
  )

## KMeans

In [36]:
# KMeans

if dreamcode_exists():
  # Inspiration: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html
  val.tools.kmeans(adata)

## UMAP

In [37]:
if dreamcode_exists("https://imgur.com/t7G45jo.png"):
  # Inspiration: https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html#nearest-neighbor-graph-construction-and-visualization
  # Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.neighbors.html
  val.preprocessing.neighbors(adata)
  # Inspiration: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html
  val.tools.umap(adata)
  # Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.umap.html
  val.plotting.umap(
      adata,
      color="kmeans",
      # Setting a smaller point size to get prevent overlap
      size=2,
  )


## Langevitour

In [38]:
if dreamcode_exists("https://imgur.com/OmmUxMo.png"):
  # See: https://logarithmic.net/langevitour/
  # See: https://colab.research.google.com/github/pfh/langevitour/blob/main/py/examples/langevitour.ipynb
  val.viz.langevitour(
      adata,
      color="kmeans",
      dimensions=["X_umap[0:2]", "X_pca[0:10]"],
  )


## DataMapPlot

In [39]:
if dreamcode_exists("https://imgur.com/IfrR6vp.png"):
  # See:
  # See: https://github.com/TutteInstitute/datamapplot/blob/main/doc/basic_usage.ipynb
  val.viz.datamapplot(
      adata,
      map_of="participants",
      basis="umap",
      color="kmeans",
  )

In [40]:
if dreamcode_exists("https://imgur.com/IfrR6vp.png"):
  # See: https://github.com/TutteInstitute/datamapplot/blob/main/doc/interactive_intro.ipynb
  val.viz.datamapplot(
      adata,
      map_of="statements",
      basis="umap",
      color="kmeans",
      interactive=True,
  )

## Talk to the City

In [41]:
if dreamcode_exists("https://imgur.com/daYtomz.png"):
  # See: https://github.com/AIObjectives/tttc-light-js/blob/dd6e11f9bb62215a9c5c6caeaf799d1c2f97745a/pyserver/config.py
  val.tools.talktothecity.generate_synthetic_reactions(adata)
  # Examples: https://takahiroanno2024.github.io/tokyoai-analysis/
  val.tools.talktothecity.generate_report(adata)

## Perspective Explorer

In [42]:
if dreamcode_exists("https://imgur.com/yVOG22g.png"):
  val.viz.perspective_explorer(adata) # TODO