# Vietnam Interactive Map 2
Date: March 2023
Author: Kathleen Shalini Rome
Organisation: UNSW

In [6]:
# If you’re working in a Jupyter notebook be sure to run the following “magic” command to render plots properly:
%matplotlib inline 

In [7]:
import geoplot as gplt
import geopandas as gpd
import geoplot.crs as gcrs
import pandas as pd
import imageio
import pandas as pd
import pathlib
import matplotlib.pyplot as plt
import mapclassify as mc
import numpy as n

In [8]:
# Loading shapefile of Vietnam commune
# Coordinate WGS84
# Opening files from different folder: https://stackoverflow.com/questions/35384358/how-to-open-my-files-in-data-folder-with-pandas-using-relative-path
vn_commune = gpd.read_file("Data_Boundaries/Admin3_commune_Vietnam/VNM_adm3.shp") 
# Source of VN files: https://data.humdata.org/dataset/cod-ab-vnm
# vn_commune
# I assume Name 1 is province, name 2 is district and name 3 is commune / ward/ town. 
# Renaming the columns
vn_commune = vn_commune.rename({'NAME_1': 'Province', 'NAME_2': 'District','NAME_3':'Commune/Town'}, axis=1)

# Getting the mekong delta communes, province, districts.
list_mk_prov_vn = ['Cần Thơ', 'Long An', 'Bến Tre', 'Tiền Giang', 'Vĩnh Long', 'Trà Vinh', 'Hậu Giang', 'Đồng Tháp', 'Sóc Trăng', 'An Giang', 'Bạc Liêu', 'Kiên Giang', 'Cà Mau']
# Select rows where column value is in List of Values - Source: https://www.statology.org/pandas-select-rows-based-on-column-values/
mk_commune = vn_commune.loc[vn_commune['Province'].isin(list_mk_prov_vn)]
mk_commune

Unnamed: 0,ID_0,ISO,NAME_0,ID_1,Province,ID_2,District,ID_3,Commune/Town,CCN_3,CCA_3,TYPE_3,ENGTYPE_3,NL_NAME_3,VARNAME_3,geometry
222,250,VNM,Vietnam,3,Đồng Tháp,19,Cao Lãnh,223,An Bình,0,,Commune,Commune,,,"POLYGON ((105.67486 10.44355, 105.67468 10.443..."
223,250,VNM,Vietnam,3,Đồng Tháp,19,Cao Lãnh,224,Ba Sao,0,,Commune,Commune,,,"POLYGON ((105.69276 10.50856, 105.69304 10.510..."
224,250,VNM,Vietnam,3,Đồng Tháp,19,Cao Lãnh,225,Bình Hàng Tây,0,,Commune,Commune,,,"POLYGON ((105.76463 10.35540, 105.76331 10.356..."
225,250,VNM,Vietnam,3,Đồng Tháp,19,Cao Lãnh,226,Bình Hàng Trung,0,,Commune,Commune,,,"POLYGON ((105.77689 10.41451, 105.77640 10.412..."
226,250,VNM,Vietnam,3,Đồng Tháp,19,Cao Lãnh,227,Bình Thạnh,0,,Commune,Commune,,,"POLYGON ((105.81646 10.32235, 105.81696 10.322..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10476,250,VNM,Vietnam,61,Vĩnh Long,659,Vĩnh Long,10477,9,0,,Ward,Ward,,,"POLYGON ((105.95335 10.24382, 105.95323 10.243..."
10477,250,VNM,Vietnam,61,Vĩnh Long,659,Vĩnh Long,10478,Tân Hội,0,,Commune,Commune,,,"POLYGON ((105.90015 10.24484, 105.89861 10.245..."
10478,250,VNM,Vietnam,61,Vĩnh Long,659,Vĩnh Long,10479,Tân Hòa,0,,Commune,Commune,,,"POLYGON ((105.91867 10.23340, 105.91857 10.232..."
10479,250,VNM,Vietnam,61,Vĩnh Long,659,Vĩnh Long,10480,Tân Ngãi,0,,Commune,Commune,,,"POLYGON ((105.92652 10.24143, 105.92640 10.240..."


In [9]:
# Loading database
#vn_database_loc = pd.read_csv('Database_test_encoded_utf_16_CSVutf8.csv', encoding='UTF-16LE', error_bad_lines=False)
vn_database_loc = pd.read_csv('Database_test_encoded_utf_16_CSVutf8.csv')
# Okay so I saved my xlsx file as a unicode text then opened it in notepad and then saved as a UTF16LE encoding and as a csv, then I opened it in csv and resaved in CSV utf8 encoding.
vn_database_loc = vn_database_loc.rename({'Commune': 'Commune/Town'}, axis=1)
vn_database_loc

Unnamed: 0,Start Month,Start Year,End Month,End Year,Project,Organisation,Description,Theme,Speific Theme,Training workshop / Research,Paper Location,District,Province,Commune/Town,Source,Comment
0,June,2013,October,2017,To better understand the key components of the...,UNSW,,,,Research,,,,,,
1,,2014,December,2017,Test the influence of key factors that will af...,UNSW,,,,Research,Hao My,Cái Nước,Cà Mau,Hoà Mỹ,,Monitoring salinity in soil pore solution and ...
2,Rice season,2015,April,2019,Quantify the improvement to rice production by...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,
3,Rice season,2015,April,2019,Quantify the improvement to rice production by...,UNSW,,,,Research,Tan Bang,Thới Bình,Cà Mau,Tân Bằng,,
4,October,2018,December,2018,Quantify the fertiliser replacement value of s...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,Dates reflect incorporation of additional trai...
5,October,2018,December,2018,Quantify the fertiliser replacement value of s...,UNSW,,,,Research,Tan Bang,Thới Bình,Cà Mau,Tân Bằng,,
6,December,2013,October,2019,Train staff in laboratory and field research t...,UNSW,,,,Research,,,,,,
7,June,2018,December,2018,"Determine nutrient (carbon, nitrogen, phosphor...",UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,
8,,,,,,UNSW,,,,,Tan Bang,Thới Bình,Cà Mau,Tân Bằng,,
9,,,,,Additional Socio-Economic Study,UNSW,,,,Research,(1) Hoa Tu 1 commune,Mỹ Xuyên,Sóc Trăng,Hòa Tú 1,https://www.aciar.gov.au/sites/default/files/2...,


In [10]:
# Getting polygons
# https://geopandas.org/en/stable/docs/user_guide/mergingdata.html I used this to get the merge formula and this link https://stackoverflow.com/questions/17978133/python-pandas-merge-only-certain-columns to get only specific columns
geocoded_database = vn_database_loc.merge(mk_commune[['Commune/Town','geometry']], on='Commune/Town')
geocoded_database.to_csv('geocoded_database.csv')
geocoded_database.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18 entries, 0 to 17
Data columns (total 17 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   Start Month                   10 non-null     object  
 1   Start Year                    12 non-null     object  
 2   End Month                     12 non-null     object  
 3   End Year                      12 non-null     object  
 4   Project                       16 non-null     object  
 5   Organisation                  18 non-null     object  
 6   Description                   4 non-null      object  
 7   Theme                         1 non-null      object  
 8   Speific Theme                 1 non-null      object  
 9   Training workshop / Research  11 non-null     object  
 10  Paper Location                16 non-null     object  
 11  District                      17 non-null     object  
 12  Province                      18 non-null     object

In [11]:
#making pandas to geopandas
database_gdf = gpd.GeoDataFrame(geocoded_database)

In [12]:
database_gdf

Unnamed: 0,Start Month,Start Year,End Month,End Year,Project,Organisation,Description,Theme,Speific Theme,Training workshop / Research,Paper Location,District,Province,Commune/Town,Source,Comment,geometry
0,,2014.0,December,2017.0,Test the influence of key factors that will af...,UNSW,,,,Research,Hao My,Cái Nước,Cà Mau,Hoà Mỹ,,Monitoring salinity in soil pore solution and ...,"POLYGON ((105.03860 9.00347, 105.04353 8.99474..."
1,,2014.0,December,2017.0,Test the influence of key factors that will af...,UNSW,,,,Research,Hao My,Cái Nước,Cà Mau,Hoà Mỹ,,Monitoring salinity in soil pore solution and ...,"POLYGON ((105.65816 9.80294, 105.65744 9.80370..."
2,Rice season,2015.0,April,2019.0,Quantify the improvement to rice production by...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.03860 9.00347, 105.04353 8.99474..."
3,Rice season,2015.0,April,2019.0,Quantify the improvement to rice production by...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.65816 9.80294, 105.65744 9.80370..."
4,October,2018.0,December,2018.0,Quantify the fertiliser replacement value of s...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,Dates reflect incorporation of additional trai...,"POLYGON ((105.03860 9.00347, 105.04353 8.99474..."
5,October,2018.0,December,2018.0,Quantify the fertiliser replacement value of s...,UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,Dates reflect incorporation of additional trai...,"POLYGON ((105.65816 9.80294, 105.65744 9.80370..."
6,June,2018.0,December,2018.0,"Determine nutrient (carbon, nitrogen, phosphor...",UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.03860 9.00347, 105.04353 8.99474..."
7,June,2018.0,December,2018.0,"Determine nutrient (carbon, nitrogen, phosphor...",UNSW,,,,Research,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.65816 9.80294, 105.65744 9.80370..."
8,,,,,"A second study, indirectly related to the proj...",UNSW,,,,,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.03860 9.00347, 105.04353 8.99474..."
9,,,,,"A second study, indirectly related to the proj...",UNSW,,,,,Hoa My,Cái Nước,Cà Mau,Hoà Mỹ,,,"POLYGON ((105.65816 9.80294, 105.65744 9.80370..."


In [13]:
#Creating map
import folium

In [14]:
map = folium.Map(location =[10,105],tiles='OpenStreetMap',zoom_start = 7)
map

In [15]:
# Loading shapefile of Vietnam provinces
vn_province = gpd.read_file("Data_Boundaries/vnm_adm_gov_20201027/vnm_admbnda_adm1_gov_20201027.shp")
# Source of VN files:https://data.humdata.org/dataset/cod-ab-vnm
vn_province.head()
# Loading shapefile of Vietnam district
vn_district = gpd.read_file("Data_Boundaries/vnm_adm_gov_20201027/vnm_admbnda_adm2_gov_20201027.shp")
# Source of VN files:https://data.humdata.org/dataset/cod-ab-vnm
vn_district.head()

# Reducing file size
list_mk_prov = ['Can Tho city', 'Long An', 'Ben Tre', 'Tien Giang', 'Vinh Long', 'Tra Vinh', 'Hau Giang', 'Dong Thap', 'Soc Trang', 'An Giang', 'Bac Lieu', 'Kien Giang', 'Ca Mau']
#Select rows where column is equal to specific value
#vn_province.loc[vn_province['ADM1_EN']== 'Can Tho city']
# Select rows where column value is in List of Values - Source: https://www.statology.org/pandas-select-rows-based-on-column-values/
mk_provinces = vn_province.loc[vn_province['ADM1_EN'].isin(list_mk_prov)]
#mk_provinces
# Reducing district to Mekong Delta Districts file
list_mk_prov = ['Can Tho city', 'Long An', 'Ben Tre', 'Tien Giang', 'Vinh Long', 'Tra Vinh', 'Hau Giang', 'Dong Thap', 'Soc Trang', 'An Giang', 'Bac Lieu', 'Kien Giang', 'Ca Mau']
# Select rows where column value is in List of Values - Source: https://www.statology.org/pandas-select-rows-based-on-column-values/
mk_district = vn_district.loc[vn_district['ADM1_EN'].isin(list_mk_prov)]
mk_district


Unnamed: 0,Shape_Leng,Shape_Area,ADM2_EN,ADM2_VI,ADM2_PCODE,ADM2_REF,ADM2ALT1EN,ADM2ALT2EN,ADM2ALT1VI,ADM2ALT2VI,ADM1_EN,ADM1_VI,ADM1_PCODE,ADM0_EN,ADM0_VI,ADM0_PCODE,date,validOn,validTo,geometry
1,0.778150,0.032598,An Bien,Huyện An Biên,VN81315,,,,,,Kien Giang,Kiên Giang,VN813,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.12050 9.85495, 105.12673 9.84550..."
6,1.053450,0.048158,An Minh,Huyện An Minh,VN81317,,,,,,Kien Giang,Kiên Giang,VN813,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.03754 9.62409, 105.03753 9.62406..."
8,0.694846,0.018661,An Phu,Huyện An Phú,VN80505,,,,,,An Giang,An Giang,VN805,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.11716 10.95483, 105.11732 10.951..."
19,0.757167,0.029617,Ba Tri,Huyện Ba Tri,VN81113,,,,,,Ben Tre,Bến Tre,VN811,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((106.61821 10.16628, 106.62171 10.157..."
26,0.593530,0.012181,Bac Lieu,TP. Bạc Liêu,VN82101,,,,,,Bac Lieu,Bạc Liêu,VN821,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.82112 9.30130, 105.82227 9.29527..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
674,1.127616,0.020695,Vinh Loi,Huyện Vĩnh Lợi,VN82105,,,,,,Bac Lieu,Bạc Liêu,VN821,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.59488 9.42122, 105.59681 9.42007..."
675,0.385001,0.004032,Vinh Long,TP. Vĩnh Long,VN80901,,,,,,Vinh Long,Vĩnh Long,VN809,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.93992 10.27409, 105.94104 10.273..."
677,0.946339,0.025393,Vinh Thanh,Huyện Vĩnh Thạnh,VN81511,,,,,,Can Tho city,TP. Cần Thơ,VN815,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.48278 10.30985, 105.48286 10.309..."
678,0.901804,0.030805,Vinh Thuan,Huyện Vĩnh Thuận,VN81319,,,,,,Kien Giang,Kiên Giang,VN813,Viet Nam,Việt Nam,VN,2019-10-01,2020-01-03,,"POLYGON ((105.27414 9.68080, 105.27434 9.68035..."


In [16]:
for _, r in mk_provinces.iterrows():
    # Without simplifying the representation of each borough,
    # the map might not be displayed
    sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillColor': 'orange'})
    folium.Popup(r['ADM1_EN']).add_to(geo_j)
    geo_j.add_to(map)
map


In [17]:
# Adding centroid markers
# Getting centroid point and creating new col to add to database
database_gdf.to_crs('epsg:4326') #https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.set_crs.html#geopandas.GeoDataFrame.set_crs
database_gdf['centroid'] = database_gdf.centroid


  database_gdf['centroid'] = database_gdf.centroid


In [18]:
# To make sure you can get all projects at the same location you need to cluster the markers
from folium.plugins import MarkerCluster

In [19]:
# Creating map with cluster

#(Source:https://deparkes.co.uk/2016/06/24/folium-marker-clusters/ )
# create a marker cluster called "Riceshrimp research cluster"
marker_cluster = MarkerCluster().add_to(map) #If I put a name for the MarkerCluster in the brackets it didnt work but once I removed it it was fine.
map
for _, r in database_gdf.iterrows():
    lat = r['centroid'].y
    lon = r['centroid'].x
    #folium.Marker(location=[lat, lon], popup= "Start Year: " + str(r['Start Year']) + "<br>" + "End Year: " + str(r['End Year']) + "<br>" + "Organisation: " + str(r['Organisation']) + "<br>" + "Type: " + str(r['Training workshop / Research']) + "<br>" + "Report: " + "<a>" + str(r['Source']) +"</a>",).add_to(map)
    # Setting up content of popup
    iframe = folium.IFrame("Start Year: " + str(r['Start Year']) + "<br>" + "End Year: " + str(r['End Year']) + "<br>" + "Organisation: " + str(r['Organisation']) + "<br>" + "Type: " + str(r['Training workshop / Research']) + "<br>" + "Report: " + "<a>" + str(r['Source']) +"</a>")
    #Initialise the popup using the iframe
    popup = folium.Popup(iframe, min_width=300, max_width=300)
    
    # Controling Folium marker colour by organisation: https://towardsdatascience.com/folium-mapping-displaying-markers-on-a-map-6bd56f3e3420
    org_colour = {'UNSW':'pink','USAID, Mekong ARCC, ADMI, VNRC':'red','ACIAR':'green'}

    try:
        icon_color = org_colour[r['Organisation']]
    except:
        #Catch nans
        icon_color = 'gray'

    #Add each row to map
    #folium.Marker(location=[lat, lon], popup=popup, icon =folium.Icon(color = icon_color, icon='')).add_to(map)
    # Instead adding clusters to map instead of individual points as above
    folium.Marker(location=[lat, lon], popup=popup, icon =folium.Icon(color = icon_color, icon='')).add_to(marker_cluster)

map

# Improvements

Customise pop ups so that they have table of information and html links that are clickable. Try this: https://towardsdatascience.com/folium-map-how-to-create-a-table-style-pop-up-with-html-code-76903706b88a 

https://www.kaggle.com/code/dabaker/fancy-folium/notebook