# Generating Geo Map

## Import libraries

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import seaborn as sns
import geoplot as gplt
from shapely.ops import cascaded_union
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

## Import Data

In [None]:
URL = 'http://opendata.hcmgis.vn/geoserver/wfs?srsName=EPSG%3A4326&typename=geonode%3Avietnam_districts&outputFormat=json&version=1.0.0&service=WFS&request=GetFeature'

# Read geo data, might take a bit
raw = gpd.read_file(URL)

In [None]:
# Drop unnecessary columns
# If you screw up later, run this cell to restore to the original data
geo_data = raw[raw.columns[7:]]

__Note:__ 
- Make sure the csv file in the same folder with this notebook.
- Add the livestock and feed data to the csv. Put 0 in rows with no data, DON'T LEAVE THEM BLANK.

In [None]:
# Input data
# --------- EDIT THIS ---------
PATH = './vn_geomap.csv'
# -----------------------------

# Read file
input_data = pd.read_csv(PATH)

# Combine livestock and feed data with geo data
geo_data[['livestock', 'feed']] = input_data[['livestock','feed']]

geo_data.head()

In [None]:
# Generate some test data (Delete this cell if you have already run the above cell)
random_livestock = np.random.random(geo_data.shape[0])
geo_data['livestock'] = random_livestock

random_num = np.random.random(geo_data.shape[0])
geo_data['feed'] = random_num
# -------------------------

geo_data.head()

## Data Preprocessing

### Combine two districts in the same province

In [None]:
# Combine two provinces together
def combine_district(province, new_name):
    
    combined_data = geo_data[geo_data.Province.isin([*province])]
    
    if new_name == False:
        new_name = combined_data['Province'].value_counts().index[0] + ' & ' + combined_data['Province'].value_counts().index[1] 
        
    new_geometry = cascaded_union([combined_data.iloc[0]['geometry'], combined_data.iloc[1]['geometry']])
    
    new_livestock = combined_data.iloc[0]['livestock'] + combined_data.iloc[1]['livestock'] 
    
    new_feed = combined_data.iloc[0]['feed'] + combined_data.iloc[1]['feed'] 
   
    combined_data = geo_data[~geo_data.Province.isin([*province])]
    combined_data = combined_data.append(pd.DataFrame([[new_name, new_name, new_geometry, new_livestock, new_feed]], columns = combined_data.columns))
    
    return combined_data.reset_index().drop(columns = 'index')

__Note:__ You should duplicate the below cell instead of just changing the values and re-running it if you are doing this step many times, so that you can always reproduce the changes.

In [None]:
# --------- EDIT THIS ---------
PROVINCES = 'Tay Ninh Province', 'Dong Nai Province'
NEW_PROVINCE_NAME = False # If set to False, concatenate two district names together
# -----------------------------

# Then run,...
geo_data = combine_district(PROVINCES, NEW_PROVINCE_NAME)
geo_data.tail()

This is what I mean by duplicating the cell...

In [None]:
# --------- EDIT THIS ---------
PROVINCES = 'Ho Chi Minh City', 'Binh Duong Province'
NEW_PROVINCE_NAME = False # If set to False, concatenate two district names together
# -----------------------------

# Then run,...
geo_data = combine_district(PROVINCES, NEW_PROVINCE_NAME)
geo_data.tail()

### Combine two provinces together

In [None]:
# Combine two districts in the same province
def combine_district(districts, province, new_name):
    
    combined_data = geo_data[(geo_data.Name.isin([*districts])) & (geo_data.Province == province)]
    
    if new_name == False:
        new_name = combined_data.iloc[0]['Name'] + ' & ' + combined_data.iloc[1]['Name'] 
        
    new_geometry = cascaded_union([combined_data.iloc[0]['geometry'], combined_data.iloc[1]['geometry']])
    
    new_livestock = combined_data.iloc[0]['livestock'] + combined_data.iloc[1]['livestock'] 
    
    new_feed = combined_data.iloc[0]['feed'] + combined_data.iloc[1]['feed'] 
   
    combined_data = geo_data[~((geo_data.Name.isin([*districts])) & (geo_data.Province == province))]
    combined_data = combined_data.append(pd.DataFrame([[new_name, province, new_geometry, new_livestock, new_feed]], columns = combined_data.columns))
    
    return combined_data.reset_index().drop(columns = 'index')

__Note:__ Same thing here, duplicate the cell instead of just changing the values.

In [None]:
# --------- EDIT THIS ---------
DISTRICTS = 'District 7', 'District 8' # Input two districts only
PROVINCE = 'Ho Chi Minh City'
NEW_DISTRICT_NAME = 'District 7 & 8' # If set to False, concatenate two district names together
# -----------------------------

# Then run,...
geo_data = combine_district(DISTRICTS, PROVINCE, NEW_DISTRICT_NAME)
geo_data.tail()

### Generate coordinates from geo data

In [None]:
# Generate coordinates from geo data
geo_data['coords'] = geo_data['geometry'].apply(lambda x: x.representative_point().coords[:])
geo_data['coords'] = [coords[0] for coords in geo_data['coords']]

## Generate Geo Map with Livestock & Feed data

In [None]:
# Select one or more provinces to plot
def select_plot_data(provinces):
    
    return geo_data[geo_data['Province'].isin(provinces)]

# Generate geo map
def generate_map(provinces, hue_data, palette, image_size, show_label, excluded_districts, label_size, space_1st_row, space_2nd_row, show_legend):
 
    plot_data = select_plot_data(provinces)
    
    gplt.choropleth(plot_data, hue = plot_data[hue_data], cmap=palette, figsize = image_size, legend = show_legend)
    
    if show_label:
        for idx, row in plot_data.iterrows():
            if row['Name'] not in excluded_districts:
                plt.text(s=row['Name'], x = row['coords'][0], y = row['coords'][1],
                             horizontalalignment='center', fontdict = {'weight': 'bold', 'size': label_size})
                plt.text(s='Livestock: ' + str(round(row['livestock'],2)), x=row['coords'][0],y = row['coords'][1] - space_1st_row,
                             horizontalalignment='center', fontdict = {'size': label_size})
                plt.text(s='Feed: ' + str(round(row['feed'],2)), x=row['coords'][0],y = row['coords'][1] - space_1st_row - space_2nd_row ,
                         horizontalalignment='center', fontdict = {'size': label_size})
    plt.show()

__Note:__ You can save the plot as an image but right-clicking the plot then `Save image as...`

In [None]:
# --------- EDIT THIS ---------
PROVINCES = ['Ho Chi Minh City', 'Binh Duong Province'] # Plot the data of given provinces, must be a list
HUE_DATA =  'livestock' # Metric used to color code
PALETTE = 'OrRd' # Color Gradient, find more here: https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
IMAGE_SIZE = (60,30)
SHOW_LABEL = True # Show/hide all labels
EXCLUDED_DISTRICTS = ['Tan Binh District', 'District 3','Phu Nhuan District'] # Hide the labels of given districts, must be a list
LABEL_SIZE = 8 # Font size of label
SPACE_1ST_ROW = 0.01 # Space between District name label and Livestock data label
SPACE_2ND_ROW = 0.01 # Space between Livestock data label and Feed data label
SHOW_LEGEND = True # Show/hide legend
# -----------------------------

# Then run,...
generate_map(PROVINCES, HUE_DATA,PALETTE, IMAGE_SIZE, SHOW_LABEL, EXCLUDED_DISTRICTS, LABEL_SIZE, SPACE_1ST_ROW, SPACE_2ND_ROW, SHOW_LEGEND)