diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index b6c8116aa..39a872a55 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -389,33 +389,43 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_1m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -432,12 +442,22 @@ subsampling: group_by: "country week" max_sequences: 600 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" max_sequences: 600 min_date: "--min-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" max_sequences: 600 @@ -455,33 +475,43 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 2m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_2m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -491,128 +521,166 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division week" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 6m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_6m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_early: group_by: "division year month" - max_sequences: 50 + max_sequences: 25 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division year month" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over all-time - # 4000 total - # all regions equal except Oceania at 33% + # 4320 total (expect ~3200) + # all eight regions equal except Oceania at 20% nextstrain_global_all_time: africa: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Africa'" asia: group_by: "country year month" - max_sequences: 750 - exclude: "--exclude-where 'region!=Asia'" + max_sequences: 600 + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=China'" europe: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Europe'" + india: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=India'" north_america: group_by: "division year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=North America'" south_america: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=South America'" oceania: group_by: "division year month" - max_sequences: 250 + max_sequences: 120 exclude: "--exclude-where 'region!=Oceania'" # Root to clade 21L diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index 751b0bc4a..deb8de43e 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -381,33 +381,43 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_1m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -424,12 +434,22 @@ subsampling: group_by: "country week" max_sequences: 600 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" max_sequences: 600 min_date: "--min-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" max_sequences: 600 @@ -447,33 +467,43 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 2m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_2m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -483,128 +513,166 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division week" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 6m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_6m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_early: group_by: "division year month" - max_sequences: 50 + max_sequences: 25 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division year month" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over all-time - # 4000 total - # all regions equal except Oceania at 33% + # 4320 total (expect ~3200) + # all eight regions equal except Oceania at 20% nextstrain_global_all_time: africa: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Africa'" asia: group_by: "country year month" - max_sequences: 750 - exclude: "--exclude-where 'region!=Asia'" + max_sequences: 600 + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=China'" europe: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Europe'" + india: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=India'" north_america: group_by: "division year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=North America'" south_america: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=South America'" oceania: group_by: "division year month" - max_sequences: 250 + max_sequences: 120 exclude: "--exclude-where 'region!=Oceania'" # if different traits should be reconstructed for some builds, specify here diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index 25b32ea57..2d0eabeab 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -381,33 +381,43 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_1m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 1M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -424,15 +434,25 @@ subsampling: group_by: "country week" max_sequences: 600 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" - max_sequences: 1500 + max_sequences: 600 min_date: "--min-date 1M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 1M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" - max_sequences: 1500 + max_sequences: 600 min_date: "--min-date 1M" exclude: "--exclude-where 'region!=North America'" south_america_recent: @@ -447,33 +467,43 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 2m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_2m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_early: @@ -483,128 +513,166 @@ subsampling: exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division week" + max_sequences: 500 + min_date: "--min-date 2M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country week" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division week" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 2M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over 6m - # 4000 total - # 4:1 ratio of focal to context - # all regions equal except Oceania at 33% + # 5125 total (expect ~3400) + # 4:1 ratio of recent to early + # all eight regions equal except Oceania at 20% nextstrain_global_6m: africa_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=China'" europe_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_early: + group_by: "division year month" + max_sequences: 125 + max_date: "--max-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_early: group_by: "division year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_early: group_by: "country year month" - max_sequences: 150 + max_sequences: 125 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_early: group_by: "division year month" - max_sequences: 50 + max_sequences: 25 max_date: "--max-date 6M" exclude: "--exclude-where 'region!=Oceania'" africa_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Africa'" asia_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=China'" europe_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Europe'" + india_recent: + group_by: "division year month" + max_sequences: 500 + min_date: "--min-date 6M" + exclude: "--exclude-where 'country!=India'" north_america_recent: group_by: "division year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=North America'" south_america_recent: group_by: "country year month" - max_sequences: 600 + max_sequences: 500 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=South America'" oceania_recent: group_by: "division year month" - max_sequences: 200 + max_sequences: 100 min_date: "--min-date 6M" exclude: "--exclude-where 'region!=Oceania'" # Custom subsampling logic for global region over all-time - # 4000 total - # all regions equal except Oceania at 33% + # 4320 total (expect ~3200) + # all eight regions equal except Oceania at 20% nextstrain_global_all_time: africa: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Africa'" asia: group_by: "country year month" - max_sequences: 750 - exclude: "--exclude-where 'region!=Asia'" + max_sequences: 600 + exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" + china: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=China'" europe: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=Europe'" + india: + group_by: "division year month" + max_sequences: 600 + exclude: "--exclude-where 'country!=India'" north_america: group_by: "division year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=North America'" south_america: group_by: "country year month" - max_sequences: 750 + max_sequences: 600 exclude: "--exclude-where 'region!=South America'" oceania: group_by: "division year month" - max_sequences: 250 + max_sequences: 120 exclude: "--exclude-where 'region!=Oceania'" # GenBank data includes "Wuhan-Hu-1/2019" which we use as the root for this build