Skip to content

Commit

Permalink
fix: removes artifact_type from artifacts, moves to a separate table (#…
Browse files Browse the repository at this point in the history
…1543)

* Previously, we were DISTINCT on source, namespace, name, AND type.
* This means that the same address was shown multiple times per
  artifact_type
* This refactor creates an intermediate model called int_all_artifacts
  that we can use
* artifact_type is removed from artifacts_v1 and artifacts_by_project_v1
* there's a new int_artifact_types table that we can use to get types
  • Loading branch information
ryscheng committed May 27, 2024
1 parent 297cb8f commit 74e51d5
Show file tree
Hide file tree
Showing 13 changed files with 251 additions and 236 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ with blockchain_artifacts as (
when artifact_type = 'EOA' then 1
else 0
end as artifact_rank
from {{ ref('int_artifacts_by_project') }}
from {{ ref('int_all_artifacts') }}
where artifact_source = "{{ upper_network_name }}"
)
group by artifact_source_id
Expand Down
2 changes: 1 addition & 1 deletion warehouse/dbt/macros/models/filtered_blockchain_events.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{% macro filtered_blockchain_events(artifact_source, source_name, source_table) %}
with known_addresses as (
select distinct `artifact_source_id` as `address`
from {{ ref("int_artifacts_by_project") }}
from {{ ref("int_all_artifacts") }}
where LOWER(artifact_source) = LOWER('{{ artifact_source }}')
), known_to as (
select events.*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ with blockchain_artifacts as (
when artifact_type = 'EOA' then 1
else 0
end as artifact_rank
from {{ ref('int_artifacts_by_project') }}
from {{ ref('int_all_artifacts') }}
where LOWER(artifact_source) = LOWER('OPTIMISM')
)
group by artifact_source_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
}}
with known_addresses as (
select distinct `artifact_source_id` as `address`
from {{ ref("int_artifacts_by_project") }}
from {{ ref("int_all_artifacts") }}
where `artifact_source` = 'OPTIMISM'
),
{% if target.name == 'production' %}
Expand Down
218 changes: 218 additions & 0 deletions warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
{#
This model is responsible for generating a list of all artifacts associated with a project.
This includes repositories, npm packages, blockchain addresses, and contracts.

Note: This will create a separate row for each artifact_type, which is de-duplicated
in int_artifacts_by_project
Note: Currently, the source and namespace for blockchain artifacts are the same. This may change
in the future.
#}

with all_repos as (
{#
Currently this is just Github.
oss-directory needs some refactoring to support multiple repository providers
#}
select
"GITHUB" as artifact_source,
"REPOSITORY" as artifact_type,
projects.project_id,
repos.owner as artifact_namespace,
repos.name as artifact_name,
repos.url as artifact_url,
CAST(repos.id as STRING) as artifact_source_id
from
{{ ref('stg_ossd__current_projects') }} as projects
cross join
UNNEST(JSON_QUERY_ARRAY(projects.github)) as github
inner join
{{ ref('stg_ossd__current_repositories') }} as repos
on
LOWER(CONCAT("https://github.com/", repos.owner))
= LOWER(JSON_VALUE(github.url))
or LOWER(repos.url) = LOWER(JSON_VALUE(github.url))
),

all_npm_raw as (
select
"NPM" as artifact_source,
"PACKAGE" as artifact_type,
projects.project_id,
JSON_VALUE(npm.url) as artifact_source_id,
case
when
JSON_VALUE(npm.url) like "https://npmjs.com/package/%"
then SUBSTR(JSON_VALUE(npm.url), 28)
when
JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%"
then SUBSTR(JSON_VALUE(npm.url), 31)
end as artifact_name,
JSON_VALUE(npm.url) as artifact_url
from
{{ ref('stg_ossd__current_projects') }} as projects
cross join
UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm
),

all_npm as (
select
project_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_name,
artifact_url,
SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)]
as artifact_namespace
from all_npm_raw
),

ossd_blockchain as (
select
projects.project_id,
tag as artifact_type,
network as artifact_namespace,
network as artifact_source,
JSON_VALUE(blockchains.address) as artifact_source_id,
JSON_VALUE(blockchains.address) as artifact_name,
JSON_VALUE(blockchains.address) as artifact_url
from
{{ ref('stg_ossd__current_projects') }} as projects
cross join
UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains
cross join
UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network
cross join
UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag
),

all_deployers as (
select
*,
"MAINNET" as artifact_namespace,
"ETHEREUM" as artifact_source
from {{ ref("stg_ethereum__deployers") }}
union all
select
*,
"ARBITRUM_ONE" as artifact_namespace,
"ARBITRUM_ONE" as artifact_source
from {{ ref("stg_arbitrum__deployers") }}
union all
{# Includes all deployers of a contract #}
select
block_timestamp,
transaction_hash,
deployer_address,
contract_address,
UPPER(network) as artifact_namespace,
UPPER(network) as artifact_source
from {{ ref("int_derived_contracts") }}
union all
{# Includes all factory deployers of a contract #}
select
block_timestamp,
transaction_hash,
factory_deployer_address as deployer_address,
contract_address,
UPPER(network) as artifact_namespace,
UPPER(network) as artifact_source
from {{ ref("int_derived_contracts") }}
),

discovered_contracts as (
select
"CONTRACT" as artifact_type,
ob.project_id,
ad.contract_address as artifact_source_id,
ob.artifact_source,
ob.artifact_namespace,
ad.contract_address as artifact_name,
ad.contract_address as artifact_url
from ossd_blockchain as ob
inner join all_deployers as ad
on
ob.artifact_source_id = ad.deployer_address
{#
We currently do not really have a notion of namespace in
oss-directory. We may need to change this when that time comes
#}
and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM")
and UPPER(ob.artifact_namespace) in (
UPPER(ad.artifact_namespace), "ANY_EVM"
)
and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY")
),

all_artifacts as (
select
project_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_name,
artifact_url
from
all_repos
union all
select
project_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_name,
artifact_url
from
ossd_blockchain
union all
select
project_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_name,
artifact_url
from
discovered_contracts
union all
select
project_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_name,
artifact_url
from
all_npm
),

all_normalized_artifacts as (
select distinct
project_id,
LOWER(artifact_source_id) as artifact_source_id,
{#
artifact_source and artifact_type are considered internal constants hence
we apply an UPPER transform
#}
UPPER(artifact_source) as artifact_source,
UPPER(artifact_type) as artifact_type,
LOWER(artifact_namespace) as artifact_namespace,
LOWER(artifact_name) as artifact_name,
LOWER(artifact_url) as artifact_url
from all_artifacts
)

select
project_id,
{{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id`,
artifact_source_id,
artifact_source,
artifact_namespace,
artifact_name,
artifact_url,
artifact_type
from all_normalized_artifacts as a
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
select distinct
artifact_id,
artifact_source_id,
artifact_source,
artifact_namespace,
artifact_name,
artifact_type
from {{ ref('int_all_artifacts') }}
4 changes: 0 additions & 4 deletions warehouse/dbt/models/intermediate/directory/int_artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ with all_artifacts as (
select
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_url,
artifact_name
Expand All @@ -16,15 +15,13 @@ with all_artifacts as (
select
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_url,
MAX_BY(artifact_name, last_used) as artifact_name
from {{ ref('int_artifacts_history') }}
group by
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_url
)
Expand All @@ -33,7 +30,6 @@ select distinct
{{ oso_id("artifact_source", "artifact_source_id") }} as artifact_id,
artifact_source_id,
artifact_source,
artifact_type,
artifact_namespace,
artifact_name,
artifact_url
Expand Down
Loading

0 comments on commit 74e51d5

Please sign in to comment.