# Combine Partition Maps

This notebook uses the DHS cluster data to partion the clusters into train and validation segments.

<pre style="font-family: monospace;">
Available AOIs: AM (Armenia)
                JO (Jordan) Do not inlcude Jordan in the AOI_LIST
                MA (Morocco)
                MB (Moldova)
                ML (Mali)
                MR (Mauritania)
                NI (Niger)
                PK (Pakistan)
                SN (Senegal)
                TD (Chad)
</pre>
## Input
<pre style="font-family: monospace;">
./GIS-Image-Stack-Processing
    /AOI/
        Partitions/
            PK/
                <span style="color: blue;">PK_all.json</span> 
                <span style="color: blue;">PK_train.json</span> 
                <span style="color: blue;">PK_valid.json</span> 
            TD/
                <span style="color: blue;">TD_all.json</span> 
                <span style="color: blue;">TD_train.json</span> 
                <span style="color: blue;">TD_valid.json</span> 
</pre>

## Output

DHS data is used as the basis for creating partition maps for each country based on the location of clusters. 

<pre style="font-family: monospace;">
./GIS-Image-Stack-Processing
    /AOI/
        Partitions/
            <span style="color: blue;">all.json</span> 
            <span style="color: blue;">train.json</span> 
            <span style="color: blue;">valid.json</span> 
            PK/
                PK_all.json
                PK_train.json
                PK_valid.json
            TD/
                TD_all.json
                TD_train.json
                TD_valid.json   

</pre>

In [1]:
import os
import sys
import json
from collections import defaultdict

In [2]:
PRT_ROOT = './GIS-Image-Stack-Processing/AOI/Partitions'

AOI_LIST = ['AM', 'MA', 'MB', 'ML', 'MR', 'NI', 'PK', 'SN', 'TD']

## Combine Partition Maps

In [3]:
def count_total_clusters(data):
    total_clusters = 0
    for aoi, clusters in data.items():
        total_clusters += len(clusters)
    return total_clusters

def combine_partition_maps(directory, subfolders=None):
    
    # Dictionary to hold combined data: keys are 'train', 'valid', 'all'
    combined_partitions = defaultdict(dict)

    # If a list of subfolders is provided, use it; otherwise, scan all subfolders
    if subfolders is None:
        subfolders = os.listdir(directory)

    # Traverse each subdirectory within the specified or main directory
    for subfolder in subfolders:
        
        subfolder_path = os.path.join(directory, subfolder)
        if os.path.isdir(subfolder_path):  # Ensure it is a directory
           
            # Scan the subfolder for JSON files
            for filename in os.listdir(subfolder_path):
                
                if filename.endswith('.json') and ('_train.json' in filename or '_valid.json' in filename or '_all.json' in filename):
                    partition_type = filename.split('_')[-1].replace('.json', '')  # Extract 'train', 'valid', or 'all'
                    country_code = filename.split('_')[0]  # Extract country code like 'PK', 'TD'

                    # Load the JSON data from the file
                    file_path = os.path.join(subfolder_path, filename)
                    with open(file_path, 'r') as file:
                        data = json.load(file)
                    
                    # Add this data to the corresponding part in the combined dictionary
                    if country_code in data:
                        combined_partitions[partition_type].update(data)

    # Write out the combined data to new JSON files and print the total number of clusters
    for partition_type, data in combined_partitions.items():
        
        output_path = os.path.join(directory, f"{partition_type}.json")
        
        # Save the combined data to the output file
        with open(output_path, 'w') as file:
            json.dump(data, file, indent=4)
        
        # Count the total number of clusters across all AOIs
        total_clusters = count_total_clusters(data)
        print(f"Combined {partition_type} partition map saved to {output_path} with {total_clusters} total clusters across all AOIs")


In [4]:
combine_partition_maps(PRT_ROOT, AOI_LIST)

Combined train partition map saved to ./GIS-Image-Stack-Processing/AOI/Partitions/train.json with 3395 total clusters across all AOIs
Combined all partition map saved to ./GIS-Image-Stack-Processing/AOI/Partitions/all.json with 4163 total clusters across all AOIs
Combined valid partition map saved to ./GIS-Image-Stack-Processing/AOI/Partitions/valid.json with 768 total clusters across all AOIs
