In [1]:
import pandas as pd
import psycopg2
import os
import uuid
import logging
from dotenv import load_dotenv

load_dotenv()

dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")

In [2]:
# get countries_biodiversity_rankings data

conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port)
cursor = conn.cursor()

countries_biodiversity_rankings = {}

cursor.execute(f"""SELECT c.country_id, c.iso_alpha3, biodiversity_rank, amphibians, amphibians_rank,
               birds, birds_rank, fish, fish_rank, mammals, mammals_rank, reptiles,
               reptiles_rank, plants, plants_rank FROM countries_biodiversity_rankings cbr
               LEFT JOIN countries c on c.country_id = cbr.country_id""")

data = cursor.fetchall()
for record in data:
   countries_biodiversity_rankings[record[0]] = record[1]

cursor.close()
conn.close()

countries_biodiversity_rankings_df = pd.DataFrame(data, columns=['country_id', 'iso_alpha3', 'biodiversity_rank', 'amphibians', 'amphibians_rank', 'birds', 
                                                                 'birds_rank', 'fish', 'fish_rank', 'mammals', 'mammals_rank',
                                                                 'reptiles', 'reptiles_rank', 'plants', 'plants_rank'])

# countries_biodiversity_rankings_df.to_csv('../../data/geojson-blob/countries_biodiversity_rankings_nile.csv', index=False)

In [15]:
import pandas as pd
import json

# Load GeoJSON data from file
geojson_file_path = '../../data/geojson-blob/countries.geojson'
with open(geojson_file_path, 'r') as geojson_file:
    geojson_data = json.load(geojson_file)

# Load CSV data from file
csv_file_path = '../../data/geojson-blob/countries_biodiversity_rankings_nile.csv'  # Ensure this is the correct path
csv_data = pd.read_csv(csv_file_path)

# Iterate over GeoJSON features and update properties
for feature in geojson_data['features']:
    iso_a3 = feature['properties']['ISO_A3']
    # Find the matching row in the DataFrame using 'iso_alpha3'
    matching_row = csv_data[csv_data['iso_alpha3'] == iso_a3]
    if not matching_row.empty:
        for column in matching_row.columns:
            # Avoid adding the identifier columns to the GeoJSON properties
            if column not in ['country_id', 'iso_alpha3']:
                # Assign each column value from the matching row to the GeoJSON properties
                feature['properties'][column] = matching_row.iloc[0][column].item()

# Output the updated GeoJSON to a new file
updated_geojson_file_path = '../../data/geojson-blob/countries_biodiversity_rankings.geojson'
with open(updated_geojson_file_path, 'w') as f:
    json.dump(geojson_data, f)

Optimization

In [16]:
# save as GZIP
import gzip
import shutil

with open('../../data/geojson-blob/countries_biodiversity_rankings.geojson', 'rb') as f_in:
    with gzip.open('../../data/geojson-blob/countries_biodiversity_rankings.geojson.gz', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [None]:
# round to 5 decimal places
import json

def round_numbers(obj):
    if isinstance(obj, dict):
        for key, value in obj.items():
            obj[key] = round_numbers(value)
    elif isinstance(obj, list):
        return [round_numbers(item) for item in obj]
    elif isinstance(obj, float):
        return round(obj, 5)
    return obj

# Load the GeoJSON file
with open('../../data/geojson-blob/countries_biodiversity_rankings.geojson', 'r') as file:
    data = json.load(file)

# Round all numbers to 5 decimal places
rounded_data = round_numbers(data)

with open('../../data/geojson-blob/rounded_countries_biodiversity_rankings.geojson', 'w') as file:
    json.dump(rounded_data, file, indent=4)

In [16]:
# adding EPI properties (read in from public)
# Load the GeoJSON file
import json
with open('../../../prana-next/public/countries_biodiversity_rankings.geojson', 'r') as file:
    geo = json.load(file)

In [17]:
# get countries_epis data

conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port)
cursor = conn.cursor()

countries_biodiversity_rankings = {}

cursor.execute(f"""select c.iso_alpha3, e.abbreviation, ce.score from "countries_epis" ce
left join countries c on ce.country_id = c.country_id
left join epis e on ce.epi_id = e.epi_id;""")

data = cursor.fetchall()
for record in data:
   countries_biodiversity_rankings[record[0]] = record[1]

cursor.close()
conn.close()

df = pd.DataFrame(data, columns=['iso_alpha3', 'epi_name', 'score'])

print(df)

     iso_alpha3 epi_name  score
0           AFG      BHV   41.9
1           AFG      SHI   90.8
2           AFG      SPI   13.4
3           AFG      PAR    5.2
4           AFG      MPA    NaN
...         ...      ...    ...
1075        ZWE      SHI   91.2
1076        ZWE      SPI   80.1
1077        ZWE      PAR   57.2
1078        ZWE      MPA    NaN
1079        ZWE      RMS    NaN

[1080 rows x 3 columns]


In [19]:
import json
import pandas as pd
from copy import deepcopy

# Load the original GeoJSON file
with open('../../../prana-next/public/countries_biodiversity_rankings.geojson', 'r') as file:
    geo = json.load(file)

# Make a deep copy of the original GeoJSON to work with
geo_copy = deepcopy(geo)

# Iterate over each country (feature) in the GeoJSON
for feature in geo_copy['features']:
    iso_a3 = feature['properties']['ISO_A3']
    
    # For each unique EPI name in your DataFrame, add a placeholder if not present
    for epi in df['epi_name'].unique():
        if epi not in feature['properties']:
            feature['properties'][epi] = None  # Placeholder for new properties

# Now, populate the actual scores from the DataFrame
for index, row in df.iterrows():
    iso_a3 = row['iso_alpha3']
    epi_name = row['epi_name']
    score = row['score']
    
    # Here, we replace NaN values with None. Adjust if you prefer a different placeholder.
    score = None if pd.isnull(score) else score
    
    # Match each feature by ISO_A3 and update its properties
    for feature in geo_copy['features']:
        if feature['properties']['ISO_A3'] == iso_a3:
            feature['properties'][epi_name] = score

# Prepare the updated GeoJSON for saving, ensuring no NaN values are present
with open('../../../prana-next/public/updated_countries_biodiversity_rankings.geojson', 'w', encoding='utf-8') as file:
    json.dump(geo_copy, file, ensure_ascii=False, indent=4)
