diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml
index 4034e4730..56242ea2c 100644
--- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml
+++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml
@@ -273,30 +273,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 1m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_1m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 1M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 1M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 1M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -305,22 +293,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division week"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 1M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 1M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 1M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country week"
@@ -330,30 +307,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 2m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_2m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 2M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 2M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 2M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -362,22 +327,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division week"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 2M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 2M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 2M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country week"
@@ -387,30 +341,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 6m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_6m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 6M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 6M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 6M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -419,22 +361,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division year month"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 6M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division year month"
-      max_sequences: 800
-      max_date: "--min-date 6M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division year month"
-      max_sequences: 800
-      max_date: "--min-date 6M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country year month"
@@ -443,27 +374,16 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
 
   # Custom subsampling logic for region Asia over all-time
-  # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_all_time:
     # Focal samples for Asia
     asia:
-      group_by: "division year month"
-      max_sequences: 1500
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Focal samples for China
-    china:
-      group_by: "division year month"
-      max_sequences: 1000
-      exclude: "--exclude-where 'country!=China'"
-    # Focal samples for India
-    india:
-      group_by: "division year month"
-      max_sequences: 1000
-      exclude: "--exclude-where 'country!=India'"
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 3500
+      exclude: "--exclude-where 'region!=Asia'"
     # Contextual samples from the rest of the world
     context:
       group_by: "country year month"
diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml
index 40d5ff3ee..3518ff964 100644
--- a/nextstrain_profiles/nextstrain-open/builds.yaml
+++ b/nextstrain_profiles/nextstrain-open/builds.yaml
@@ -273,30 +273,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 1m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_1m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 1M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 1M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 1M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -305,22 +293,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division week"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 1M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 1M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 1M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country week"
@@ -330,30 +307,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 2m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_2m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 2M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 2M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 2M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -362,22 +327,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division week"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 2M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 2M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division week"
-      max_sequences: 800
-      max_date: "--min-date 2M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country week"
@@ -387,30 +341,18 @@ subsampling:
 
   # Custom subsampling logic for region Asia over 6m
   # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of recent to early
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_6m:
     # Early focal samples for Asia
     asia_early:
-      group_by: "division year month"
-      max_sequences: 300
-      max_date: "--max-date 6M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Early focal samples for China
-    china_early:
-      group_by: "division year month"
-      max_sequences: 200
-      max_date: "--max-date 6M"
-      exclude: "--exclude-where 'country!=China'"
-    # Early focal samples for India
-    india_early:
-      group_by: "division year month"
-      max_sequences: 200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 700
       max_date: "--max-date 6M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_early:
       group_by: "country year month"
@@ -419,22 +361,11 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
     # Recent focal samples for Asia
     asia_recent:
-      group_by: "division year month"
-      max_sequences: 1200
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 2800
       min_date: "--min-date 6M"
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Recent focal samples for China
-    china_recent:
-      group_by: "division year month"
-      max_sequences: 800
-      max_date: "--min-date 6M"
-      exclude: "--exclude-where 'country!=China'"
-    # Recent focal samples for India
-    india_recent:
-      group_by: "division year month"
-      max_sequences: 800
-      max_date: "--min-date 6M"
-      exclude: "--exclude-where 'country!=India'"
+      exclude: "--exclude-where 'region!=Asia'"
     # Early contextual samples from the rest of the world
     context_recent:
       group_by: "country year month"
@@ -443,27 +374,16 @@ subsampling:
       exclude: "--exclude-where 'region=Asia'"
 
   # Custom subsampling logic for region Asia over all-time
-  # Grouping by division
-  # Separating three buckets for China, India and elsewhere
+  # Grouping by country weighted by population size
   # 4375 total
   # 4:1 ratio of focal to context
-  # 3:2:2 proportions of Asia, China, India
   nextstrain_region_asia_grouped_by_division_all_time:
     # Focal samples for Asia
     asia:
-      group_by: "division year month"
-      max_sequences: 1500
-      exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'"
-    # Focal samples for China
-    china:
-      group_by: "division year month"
-      max_sequences: 1000
-      exclude: "--exclude-where 'country!=China'"
-    # Focal samples for India
-    india:
-      group_by: "division year month"
-      max_sequences: 1000
-      exclude: "--exclude-where 'country!=India'"
+      group_by: "country year month"
+      group_by_weights: "data/country_population_weights.tsv"
+      max_sequences: 3500
+      exclude: "--exclude-where 'region!=Asia'"
     # Contextual samples from the rest of the world
     context:
       group_by: "country year month"
diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk
index 82922157e..396b0c7a0 100644
--- a/workflow/snakemake_rules/main_workflow.smk
+++ b/workflow/snakemake_rules/main_workflow.smk
@@ -190,6 +190,8 @@ def _get_specific_subsampling_setting(setting, optional=False):
                     value = f"--exclude-ambiguous-dates-by {value}"
                 elif setting == 'group_by':
                     value = f"--group-by {value}"
+                elif setting == 'group_by_weights':
+                    value = f"--group-by-weights {value}"
         elif value is not None:
             # If is 'seq_per_group' or 'max_sequences' build subsampling setting,
             # need to return the 'argument' for augur
@@ -265,6 +267,14 @@ rule index_sequences:
             --output {output.sequence_index} 2>&1 | tee {log}
         """
 
+rule get_weights:
+    output: "data/country_population_weights.tsv"
+    shell:
+        """
+        python3 scripts/get_population_sizes.py \
+            --output {output}
+        """
+
 rule subsample:
     message:
         """
@@ -285,7 +295,11 @@ rule subsample:
         metadata = _get_unified_metadata,
         include = config["files"]["include"],
         priorities = get_priorities,
-        exclude = config["files"]["exclude"]
+        exclude = config["files"]["exclude"],
+        # FIXME: check if one weights file for all calls is appropriate. so
+        # far it seems fine, but maybe not in the future if weighting
+        # columns will vary across different samples.
+        weights = "data/country_population_weights.tsv"
     output:
         strains="results/{build_name}/sample-{subsample}.txt",
     log:
@@ -294,6 +308,7 @@ rule subsample:
         "benchmarks/subsample_{build_name}_{subsample}.txt"
     params:
         group_by = _get_specific_subsampling_setting("group_by", optional=True),
+        group_by_weights = _get_specific_subsampling_setting("group_by_weights", optional=True),
         sequences_per_group = _get_specific_subsampling_setting("seq_per_group", optional=True),
         subsample_max_sequences = _get_specific_subsampling_setting("max_sequences", optional=True),
         sampling_scheme = _get_specific_subsampling_setting("sampling_scheme", optional=True),
@@ -323,6 +338,7 @@ rule subsample:
             {params.exclude_ambiguous_dates_argument} \
             {params.priority_argument} \
             {params.group_by} \
+            {params.group_by_weights} \
             {params.sequences_per_group} \
             {params.subsample_max_sequences} \
             {params.sampling_scheme} \