In [5]:
# Import required libraries
import pandas as pd
import geopandas as gpd
import os
import pyproj

proj_lib = os.path.join(os.environ['CONDA_PREFIX'], 'Library', 'share', 'proj')
os.environ['PROJ_LIB'] = proj_lib
pyproj.datadir.set_data_dir(proj_lib)

# Load the CSV file from the URL
url = 'https://raw.githubusercontent.com/juliocollazos/geoia_examples/main/Example_one/UML_data.csv'
df = pd.read_csv(url)

# Display the first few rows and data info
print(df.head())
print(df.info())

         UML ID               Group Name                 Parent Company  \
0  PO1000000017                AGROPALMA                      AGROPALMA   
1  PO1000000019  KULIM (MALAYSIA) BERHAD         EPA MANAGEMENT SDN BHD   
2  PO1000000020                    NBPOL           NEW BRITAIN PALM OIL   
3  PO1000000021       ROYAL GOLDEN EAGLE           INTI INDOSAWIT SUBUR   
4  PO1000000052          IOI CORPORATION  PAMOL ESTATES (SABAH) SDN BHD   

     Mill Name         RSPO Status           RSPO Type  \
0          CPA  Not RSPO Certified                 NaN   
1      SEDENAK      RSPO Certified  RSPO Certified, MB   
2         MOSA      RSPO Certified  RSPO Certified, IP   
3     BUATAN I      RSPO Certified  RSPO Certified, MB   
4  PAMOL SABAH      RSPO Certified  RSPO Certified, IP   

  Date RSPO Certification Status   Latitude   Longitude       GPS coordinates  \
0                     24/05/2024  -2.253539   -48.58567  -2.253539, -48.58567   
1                     24/05/2024   1.7

In [2]:
# Convert 'Latitude' and 'Longitude' to numeric, removing invalid values
df['Latitude'] = pd.to_numeric(df['Latitude'], errors='coerce')
df['Longitude'] = pd.to_numeric(df['Longitude'], errors='coerce')

# Remove rows with invalid coordinates
df = df.dropna(subset=['Latitude', 'Longitude'])

# Filter for Malaysia and Indonesia
df_filtered = df[df['Country'].isin(['Malaysia', 'Indonesia'])]

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(
    df_filtered, 
    geometry=gpd.points_from_xy(df_filtered.Longitude, df_filtered.Latitude),
    crs="EPSG:4326"
)

# Display the first few rows of the GeoDataFrame and its info
print(gdf.head())
print(gdf.info())

         UML ID               Group Name                 Parent Company  \
1  PO1000000019  KULIM (MALAYSIA) BERHAD         EPA MANAGEMENT SDN BHD   
3  PO1000000021       ROYAL GOLDEN EAGLE           INTI INDOSAWIT SUBUR   
4  PO1000000052          IOI CORPORATION  PAMOL ESTATES (SABAH) SDN BHD   
6  PO1000000054                MUSIM MAS     MUSIM MAS HOLDINGS PTE LTD   
7  PO1000000058                  CARGILL                        HINDOLI   

      Mill Name     RSPO Status           RSPO Type  \
1       SEDENAK  RSPO Certified  RSPO Certified, MB   
3      BUATAN I  RSPO Certified  RSPO Certified, MB   
4   PAMOL SABAH  RSPO Certified  RSPO Certified, IP   
6  BATANG KULIM  RSPO Certified  RSPO Certified, IP   
7  SUNGAI LILIN  RSPO Certified  RSPO Certified, IP   

  Date RSPO Certification Status  Latitude   Longitude        GPS coordinates  \
1                     24/05/2024  1.730858  103.538323   1.730858, 103.538323   
3                     24/05/2024  0.434444  101.825000  

In [3]:
# Load the country boundaries from the GeoPackage file
country_boundaries_url = 'https://github.com/juliocollazos/geoia_examples/raw/main/Example_one/gadm_country.gpkg'
country_boundaries = gpd.read_file(country_boundaries_url)

# Filter the boundaries to include only Malaysia and Indonesia
filtered_boundaries = country_boundaries[country_boundaries['country_name'].isin(['Malaysia', 'Indonesia'])]

# Display information about the filtered boundaries
print(filtered_boundaries.info())
print(filtered_boundaries[['country_name', 'geometry']])

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 8006 entries, 66727 to 78415
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   country_name  8006 non-null   object  
 1   geometry      8006 non-null   geometry
dtypes: geometry(1), object(1)
memory usage: 187.6+ KB
None
      country_name                                           geometry
66727    Indonesia  POLYGON ((95.78820 2.63992, 95.73351 2.75680, ...
66728    Indonesia  POLYGON ((95.74487 2.70501, 95.71339 2.71159, ...
66729    Indonesia  POLYGON ((95.76553 2.82823, 95.76457 2.83601, ...
66730    Indonesia  POLYGON ((95.78683 2.83402, 95.78914 2.83608, ...
66731    Indonesia  POLYGON ((95.78432 2.83625, 95.78519 2.83849, ...
...            ...                                                ...
78411     Malaysia  POLYGON ((118.92389 5.39945, 118.92667 5.37750...
78412     Malaysia  POLYGON ((119.11056 5.46083, 119.13722 5.44556...
78413     Malaysia  

In [4]:
import folium
from folium.plugins import MarkerCluster

# Now, let's ensure both GeoDataFrames have the correct CRS
gdf = gdf.set_crs(epsg=4326, inplace=False)
filtered_boundaries = filtered_boundaries.set_crs(epsg=4326, inplace=False)
# Calculate the average coordinates for the map center
avg_lat = gdf['Latitude'].mean()
avg_lon = gdf['Longitude'].mean()

# Create a map centered on the average coordinates
map_center = folium.Map(location=[avg_lat, avg_lon], zoom_start=5, tiles='OpenStreetMap')

# Add country boundaries layer
folium.GeoJson(
    filtered_boundaries,
    name='Country Boundaries',
    style_function=lambda feature: {
        'fillColor': 'yellow',
        'color': 'black',
        'weight': 2,
        'fillOpacity': 0.3
    },
    popup=folium.GeoJsonPopup(fields=['country_name'])
).add_to(map_center)

# Create a MarkerCluster for palm oil mills
marker_cluster = MarkerCluster(name='Palm Oil Mills').add_to(map_center)

# Add markers for each palm oil mill
for idx, row in gdf.iterrows():
    popup_text = f"Mill Name: {row['Mill Name']}<br>RSPO Status: {row['RSPO Status']}<br>Country: {row['Country']}"
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=popup_text
    ).add_to(marker_cluster)

# Add layer control
folium.LayerControl().add_to(map_center)

# Display the map
map_center