# Prerequisites

## AWS credentials

This notebook needs AWS credentials with which to make S3 API calls.  
In the **Secrets** section on the left sidebar (a key-shaped icon), add three variables:

* `aws_access_key_id`
* `aws_secret_access_key`
* `aws_session_token` (only if you are using short-term AWS credentials)

See [AWS documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/security-creds-programmatic-access.html) for details.  The access key ID often starts with `AKI` or `ASI`; the other two fields are random base64-encoded strings (made of alphanumeric letters/digits, `/` and/or `+`).

## Google Cloud project

This notebook sources the GitHub event data by running BigQuery, for which it needs a Google Cloud project.  See [Google Cloud documentation](https://cloud.google.com/resource-manager/docs/creating-managing-projects) for details.  Once you created a project, enable BigQuery API on it, and substitute its project name for `ek-oso-test` in the `%%bigquery` cell below, e.g. if your project name is `my-gcloud-project`:

```
%%bigquery dataframe --project my-gcloud-project

-- the rest of the query comes here...
```

In [1]:
!pip install 'openrank-sdk>=0.4.0' boto3

Collecting openrank-sdk>=0.4.0
  Downloading openrank_sdk-0.4.0-py3-none-any.whl.metadata (14 kB)
Collecting boto3
  Downloading boto3-1.35.10-py3-none-any.whl.metadata (6.6 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from openrank-sdk>=0.4.0)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx>=0.6.7 (from openrank-sdk>=0.4.0)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting sphinx>=7.1.0 (from openrank-sdk>=0.4.0)
  Downloading sphinx-8.0.2-py3-none-any.whl.metadata (6.2 kB)
Collecting botocore<1.36.0,>=1.35.10 (from boto3)
  Downloading botocore-1.35.10-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.2-py3-none-any.whl.metadata (1.7 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->openrank-sdk>=0.4.0)
  Downloading

In [2]:
import os
from tempfile import NamedTemporaryFile

import boto3
from google.colab import auth, userdata
from google.cloud import bigquery
from openrank_sdk import EigenTrust, trust
import pandas as pd

In [3]:
# gcloud auth
auth.authenticate_user()

In [6]:
# aws auth
for var in ('aws_access_key_id', 'aws_secret_access_key', 'aws_session_token'):
    try:
        val = userdata.get(var)
    except userdata.SecretNotFoundError:
        print(f"{var} not found")
        os.environ.pop(var.upper(), None)
    else:
        print(f"{var} found")
        os.environ[var.upper()] = val
        del val
del var
boto3.setup_default_session()
resp = boto3.DEFAULT_SESSION.client('sts').get_caller_identity()
del resp['ResponseMetadata']
resp

aws_access_key_id found
aws_secret_access_key found
aws_session_token found


{'UserId': 'AROAV3MYZKCCOPSV5CPZV:ek@karma3labs.com',
 'Account': '402436739204',
 'Arn': 'arn:aws:sts::402436739204:assumed-role/AWSReservedSSO_AWSPowerUserAccess_5ed6bf1acfebf823/ek@karma3labs.com'}

# Create I&C Local Trust

In [9]:
%%bigquery localtrust --project ek-oso-test

WITH
  -- see https://gov.optimism.io/t/8393
  timebox AS (
    SELECT
      TIMESTAMP '1980-01-01 00:00:00 UTC' AS start,
      TIMESTAMP '2024-06-27 00:00:00 UTC' AS `end`
  ),
  bot_users AS (SELECT * FROM UNNEST([
    'safe-infra'
  ]) AS username),
  repo_event_types AS (
    SELECT DISTINCT event_type
    FROM `opensource-observer.oso.int_events`
    WHERE from_artifact_type = 'GIT_USER' AND to_artifact_type = 'REPOSITORY'
  ),
  -- user-to-repo trust components, signals interest/trust
  trust_event_weights AS (
    SELECT * FROM UNNEST(ARRAY<STRUCT<event_type STRING, weight INT64>>[
      ('FORKED', 1),
      ('STARRED', 5),
      ('ISSUE_OPENED', 10),
      ('PULL_REQUEST_OPENED', 20),
      -- added EK 8/3
      ('PULL_REQUEST_MERGED', 10),
      ('COMMIT_CODE', 5)
    ])
  ),
  -- repo-to-user trust components
  credit_event_weights AS (
    SELECT * FROM UNNEST(ARRAY<STRUCT<event_type STRING, weight INT64>>[
      ('PULL_REQUEST_OPENED', 5), -- TODO(ek): gameable, only count merged ones
      ('PULL_REQUEST_MERGED', 1), -- to the one who merged PR, not the author
      ('COMMIT_CODE', 3)
    ])
  ),
  repo_events AS (
    SELECT
      `time`,
      -- events' importance exponentially decay, with the half life of 1 year
      POW(0.5, TIMESTAMP_DIFF(TIMESTAMP '2024-08-08 10:15:39 UTC', time, SECOND) / 31536000) AS age_factor,
      event_type,
      from_artifact_name AS `from`,
      to_artifact_namespace || '/' || to_artifact_name AS `to`,
      amount
    FROM `opensource-observer.oso.int_events`
    JOIN timebox ON `opensource-observer.oso.int_events`.time BETWEEN timebox.start AND timebox.`end`
    JOIN repo_event_types USING (event_type)
    LEFT OUTER JOIN bot_users ON from_artifact_name = bot_users.username
    WHERE bot_users.username IS NULL
  )
SELECT
  `from` AS i,
  `to` AS j,
  SUM(amount * age_factor * weight) AS v
FROM repo_events
JOIN trust_event_weights USING (event_type)
GROUP BY i, j
HAVING i NOT LIKE '%[bot]' AND i NOT LIKE '%-bot'
UNION ALL
SELECT
  `to` AS i,
  `from` AS j,
  SUM(amount * age_factor * weight) AS v
FROM repo_events
JOIN credit_event_weights USING (event_type)
GROUP BY i, j
HAVING j NOT LIKE '%[bot]' AND j NOT LIKE '%-bot'
ORDER BY v DESC


Query is running:   0%|          |

Downloading:   0%|          |

In [10]:
localtrust

Unnamed: 0,i,j,v
0,samczsun,metamask/eth-phishing-detect,190884.215234
1,code423n4,code-423n4/code423n4.com,170533.441437
2,metamask/eth-phishing-detect,samczsun,114390.660669
3,matrixbot,element-hq/synapse,113642.377667
4,coreycaplan3,dolomite-exchange/liquidity-mining-data,76907.536424
...,...,...,...
3300610,magiclu,clowwindy/chinadns-c,0.001389
3300611,yhfolive,internetarchive/heritrix3,0.001389
3300612,hynnet,shadowsocks/shadowsocks-android,0.001388
3300613,harryert,clowwindy/chinadns,0.001388


In [11]:
pretrust = pd.DataFrame([
    dict(i='testinprod-io/op-erigon', v=2),
    dict(i='a16z/magi', v=2),
    dict(i='ethereum-optimism/optimism', v=2),
    dict(i='ethereum-optimism/op-geth', v=2),
    dict(i='ethereum-optimism/asterisc', v=2),
    dict(i='ethereum-optimism/kona', v=2),
    dict(i='ethereum-optimism/superchain-ops', v=1),
    dict(i='ethereum-optimism/op-analytics', v=1),
    dict(i='ethereum-optimism/design-docs', v=2),
    dict(i='ethereum-optimism/specs', v=2),
])
pretrust

Unnamed: 0,i,v
0,testinprod-io/op-erigon,2
1,a16z/magi,2
2,ethereum-optimism/optimism,2
3,ethereum-optimism/op-geth,2
4,ethereum-optimism/asterisc,2
5,ethereum-optimism/kona,2
6,ethereum-optimism/superchain-ops,1
7,ethereum-optimism/op-analytics,1
8,ethereum-optimism/design-docs,2
9,ethereum-optimism/specs,2


In [77]:
with (
    NamedTemporaryFile(suffix='.csv') as lt_csvfile,
    NamedTemporaryFile(suffix='.csv') as pt_csvfile,
):
    localtrust.to_csv(lt_csvfile, index=False)
    lt_csvfile.flush()
    pretrust.to_csv(pt_csvfile, index=False)
    pt_csvfile.flush()
    et = EigenTrust(host_url='https://ek-go-eigentrust.k3l.io')
    scores2 = et.run_eigentrust_from_s3(lt_csvfile.name, pt_csvfile.name, alpha=0.2)
    scores5 = et.run_eigentrust_from_s3(lt_csvfile.name, pt_csvfile.name, alpha=0.5)


  scores2 = et.run_eigentrust_from_s3(lt_csvfile.name, pt_csvfile.name, alpha=0.2, check_freq=2)
  scores5 = et.run_eigentrust_from_s3(lt_csvfile.name, pt_csvfile.name, alpha=0.5, check_freq=2)


In [78]:
user_scores = pd.DataFrame(sorted((iv for iv in scores2 if '/' not in iv['i']), key=lambda iv: -iv['v']))
user_scores

Unnamed: 0,i,v
0,imtei,5.044543e-02
1,pcw109550,3.593820e-02
2,tynes,3.351187e-02
3,refcell,2.092164e-02
4,optimismbot,1.863467e-02
...,...,...
74386,yongebridge,1.751401e-15
74387,nu11u5,1.580139e-15
74388,weichaoxiao,1.171928e-15
74389,mason-hz,8.168998e-16


In [79]:
repo_scores = pd.DataFrame(sorted((iv for iv in scores5 if '/' in iv['i']), key=lambda iv: -iv['v']))
repo_scores

Unnamed: 0,i,v
0,ethereum-optimism/optimism,1.103424e-01
1,testinprod-io/op-erigon,6.588763e-02
2,a16z/magi,6.287980e-02
3,ethereum-optimism/kona,6.179217e-02
4,ethereum-optimism/op-geth,6.031764e-02
...,...,...
42412,ebridgecrosschain/ebridge-server-indexer,1.556417e-18
42413,ccpgames/esky,1.439670e-18
42414,ethelo/bonmin,1.324496e-18
42415,ebridgecrosschain/ebridge-oracle-indexer,1.251326e-18
