In [36]:
from catboost import CatBoostClassifier, Pool
import streamlit as st
from streamlit_geolocation import streamlit_geolocation
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely

LATLONG_CRS = "EPSG:4326"

FEATURE_SOIL_GROUP = ['grp_properties_upper_organic', 'grp_properties_upper_N', 'grp_properties_upper_P', 'grp_properties_upper_K', 'grp_properties_lower_organic', 'grp_properties_lower_N',
                      'grp_properties_lower_P', 'grp_properties_lower_K', 'grp_properties_upper_pH_upper', 'grp_properties_upper_pH_lower', 'grp_properties_lower_pH_upper',
                      'grp_properties_lower_pH_lower', 'grp_awc_min', 'grp_awc_max', 'grp_awc_avg']

X_FEATURE = ['grp_properties_upper_organic',
             'grp_properties_upper_N', 'grp_properties_upper_P',
             'grp_properties_upper_K', 'grp_properties_lower_organic',
             'grp_properties_lower_N', 'grp_properties_lower_P',
             'grp_properties_lower_K', 'grp_properties_upper_pH_upper',
             'grp_properties_upper_pH_lower', 'grp_properties_lower_pH_upper',
             'grp_properties_lower_pH_lower', 'grp_awc_min', 'grp_awc_max',
             'grp_awc_avg']

#! load soil data
soil_CH_gdf = gpd.read_file(
    ".\\Soil_Chon Buri\\Soil_Chon Buri\\Soil_Chon Buri.shp", encoding="tis-620")
soil_CH_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

soil_CHS_gdf = gpd.read_file(
    ".\\Soil_Chachoengsao\\Soil_Chachoengsao\\Soil_Chachoengsao.shp", encoding="tis-620")
soil_CHS_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

soil_JB_gdf = gpd.read_file(
    ".\\Soil_Chanthaburi\\Soil_Chanthaburi\\Soil_จ.จันทบุรี.shp", encoding="tis-620")
soil_JB_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

soil_RY_gdf = gpd.read_file(
    ".\\Soil_Rayong\\Soil_Rayong\\Soil_จ.ระยอง.shp", encoding="tis-620")
soil_RY_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

soil_gdf = gpd.GeoDataFrame(pd.concat(
    [soil_CH_gdf, soil_CHS_gdf, soil_RY_gdf, soil_JB_gdf], ignore_index=True), geometry="geometry")

soil_grp_df = pd.read_parquet('soil_group_data.parquet').fillna(0)
soil_grp_df[FEATURE_SOIL_GROUP] = soil_grp_df[FEATURE_SOIL_GROUP].map(
    lambda x: 0 if x == -1 else x)

del soil_CH_gdf, soil_CHS_gdf, soil_JB_gdf, soil_RY_gdf

# #! load landuse data
# landuse_CH_gdf = gpd.read_file(
#     ".\\Landuse_Chon Buri_2564\\Landuse_Chon Buri_2564\\Landuse_Chon Buri_2564.shp", encoding='tis-620')
# landuse_CH_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

# landuse_CHS_gdf = gpd.read_file(
#     ".\\Landuse_Chachoengsao_2561\\Landuse_Chachoengsao_2561\\Landuse_ฉะเชิงเทรา_2561.shp", encoding="tis-620")
# landuse_CHS_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

# landuse_JB_gdf = gpd.read_file(
#     ".\\Landuse_Chanthaburi_2561\\Landuse_Chanthaburi_2561\\Landuse_จันทบุรี_2561.shp", encoding="tis-620")
# landuse_JB_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

# landuse_RY_gdf = gpd.read_file(
#     ".\\Landuse_Rayong_2561\\Landuse_Rayong_2561\\LU_ระยอง_2561.shp", encoding="tis-620")
# landuse_RY_gdf.to_crs(crs=LATLONG_CRS, inplace=True)

# landuse_gdf = gpd.GeoDataFrame(pd.concat(
#     [landuse_CH_gdf, landuse_CHS_gdf, landuse_RY_gdf, landuse_JB_gdf], ignore_index=True), geometry="geometry")

#! Load catboost model
model = CatBoostClassifier()
model.load_model("lastest_cat_boost_model.cbm")

soil_ids_search = []
soil_ids_search_grp_id = []

for idx, i in enumerate(soil_grp_df['soil_series_id'].to_list()):
    soil_ids_search += [j.lower() for j in i]
    soil_ids_search_grp_id += [idx+1]*len(i)

def find_soil_group_data(soil_series_id):
    # ex soil_series_id = 48
    soil_series_id = str(soil_series_id).strip()
    if soil_series_id[0].isnumeric():
        return soil_series_id[:2]
    try:
        return soil_ids_search_grp_id[soil_ids_search.index(soil_series_id.lower())]
    except:
        return 61


# def get_soil_group_data(grp_ids):
#     # arr = [0 * len(FEATURE_SOIL_GROUP)]
#     arr = []
#     if len(grp_ids.split(',')) > 1:
#         grp_ids = [find_soil_group_data(i) for i in grp_ids.split(',')]
#         grp_id = grp_ids[0]
#         # for i in grp_ids:
#         #     soil_grp_df.loc[soil_grp_df['grp_id'] == i][FEATURE_SOIL_GROUP]
#     else:
#         grp_id = find_soil_group_data(grp_ids)
#     # print(grp_id)
#     return soil_grp_df[soil_grp_df['grp_id'] == int(grp_id)][FEATURE_SOIL_GROUP].values[0]

In [48]:
soil_gdf.to_parquet('./soil_gdf.parquet', compression='gzip')

In [43]:
soil_gdf.to_file("./test/soil_gdf.shp")

In [76]:
find_soil_group_data(35)

'35'

In [78]:
soil_grp_df[soil_grp_df['grp_id']==35]

Unnamed: 0,grp_id,soil_series,soil_series_id,grp_properties,grp_resourcefullness,grp_properties_upper_organic,grp_properties_upper_N,grp_properties_upper_P,grp_properties_upper_K,grp_properties_upper_pH,...,grp_properties_lower_pH,grp_properties_upper_pH_upper,grp_properties_upper_pH_lower,grp_properties_lower_pH_upper,grp_properties_lower_pH_lower,grp_awc,grp_awc_lst,grp_awc_min,grp_awc_max,grp_awc_avg
34,35,ชุดดินดอนไร่ (Dr) ชุดดินด่านซ้าย \r\n ...,"[Dr, Ds, Hc, Kt, Mb, Suk, Wn]",เป็นกลุ่มชุดดินที่เกิดจากวัตถุต้นกำเนิดดินพวกต...,ต่ำ,0.5,0.025,18.2,37.0,"[5.0, 6.0]",...,"[4.5, 5.5]",6.0,5.0,5.5,4.5,มีค่าระหว่าง 11.22-15.97 \r\n ...,"[11.22, 15.97, 14.10, 120, 33, 1,500]",11.22,15.97,14.1


In [79]:
def predict(lat, lon):
    #! Find soil group
    point = shapely.geometry.Point(lon, lat)
    soil_grp = soil_gdf[soil_gdf['geometry'].intersects(point)]
    if len(soil_grp) == 0:
        return [[-1]]
    soil_grp = soil_grp.iloc[0]['soilgroup']

    soil_grp = find_soil_group_data(soil_grp)

    print(soil_grp)
    
    #! Find soil group data
    soil_grp_data = soil_grp_df[soil_grp_df['grp_id'] == int(soil_grp)]

    #! Predict
    soil_grp_data = soil_grp_data[FEATURE_SOIL_GROUP]
    soil_grp_data = soil_grp_data.to_numpy().reshape(1, -1)
    if soil_grp_data.shape[1] == 0:
        return [[-1]]
    print(soil_grp_data)
    result = model.predict_proba(soil_grp_data)
    # result = "test"

    return result

In [80]:
predict(13.43116, 101.59227)

35
[[0.5 0.025 18.2 37.0 0.4 0.020000000000000004 15.4 35.0 6.0 5.0 5.5 4.5
  '11.22' '15.97' '14.10']]


array([[0.33504828, 0.5606639 , 0.00824956, 0.14043116]])

In [60]:
lat = 13.6173730
lon = 101.1011580

point = shapely.geometry.Point(lon, lat)
soil_grp = soil_gdf[soil_gdf.intersects(point)].iloc[0]['soilgroup']

soil_grp = find_soil_group_data(soil_grp)

#! Find soil group data
soil_grp_data = soil_grp_df[soil_grp_df['grp_id'] == soil_grp]

#! Predict
soil_grp_data = soil_grp_data[FEATURE_SOIL_GROUP]
soil_grp_data = soil_grp_data.to_numpy().reshape(1, -1)
result = model.predict(soil_grp_data)

CatBoostError: Input data must have at least one feature

In [53]:
soil_grp_data

array([], shape=(1, 0), dtype=object)

In [67]:
soil_gdf[soil_gdf['P_CODE'] == 'CHS']

Unnamed: 0,FID_Soil25,soilgroup,fertility,texture_to,soilserien,pH_top,soilseries,FID_wgs84_,AMPHOE_IDN,AMP_CODE,AMPHOE_T,AMPHOE_E,PROV_CODE,PROV_NAM_T,PROV_NAM_E,P_CODE,geometry
406,449,35b,ต่ำ,ดินร่วนปนทราย,ลาดหญ้า,กรดจัดมากถึงกรดปานกลาง,Ly,25,2410,10,อ.ท่าตะเกียบ,Amphoe Tha Takiap,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.59227 13.43116 0.00000, ..."
407,465,56B,ต่ำ,ดินร่วนปนทราย,ลาดหญ้า,กรดจัดมากถึงกรดปานกลาง,Ly,25,2410,10,อ.ท่าตะเกียบ,Amphoe Tha Takiap,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.72649 13.53508 0.00000, ..."
408,519,47D,ปานกลาง,ดินร่วนปนดินเหนียวปนกรวด,มวกเหล็ก,กรดเล็กน้อยถึงเป็นกลาง,Ml,25,2410,10,อ.ท่าตะเกียบ,Amphoe Tha Takiap,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.52284 13.42364 0.00000, ..."
409,793,SC,-,,พื้นที่ลาดชันเชิงซ้อน,,SC,25,2410,10,อ.ท่าตะเกียบ,Amphoe Tha Takiap,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.67420 13.58190 0.00000, ..."
410,978,48C,ปานกลาง,ดินร่วนปนดินเหนียวปนกรวด,ท่ายาง,กรดจัดถึงกรดเล็กน้อย,Ty,25,2410,10,อ.ท่าตะเกียบ,Amphoe Tha Takiap,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.77235 13.53999 0.00000, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622,987,48D,ปานกลาง,ดินร่วนปนดินเหนียวปนกรวด,ท่ายาง,กรดจัดถึงกรดเล็กน้อย,Ty,18,2408,08,อ.สนามชัยเขต,Amphoe Sanam Chaikhet,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.67674 13.58086 0.00000, ..."
623,992,48E,ปานกลาง,ดินร่วนปนทรายปนกรวด,ท่ายาง,กรดจัดถึงกรดเล็กน้อย,Ty,18,2408,08,อ.สนามชัยเขต,Amphoe Sanam Chaikhet,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.77378 13.54903 0.00000, ..."
624,245,46,ต่ำ,ดินร่วนปนดินเหนียวปนกรวด,กบินทร์บุรี,กรดเล็กน้อยถึงเป็นกลาง,Kb,18,2408,08,อ.สนามชัยเขต,Amphoe Sanam Chaikhet,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"MULTIPOLYGON Z (((101.86755 13.61477 0.00000, ..."
625,96,3,ปานกลาง,ดินเหนียว,ฉะเชิงเทรา,กรดจัดถึงด่างปานกลาง,Cc,16,2411,11,อ.คลองเขื่อน,Amphoe Khong Khuain,24,จ.ฉะเชิงเทรา,Changwat Chachoengsao,CHS,"POLYGON Z ((101.18165 13.81392 0.00000, 101.18..."


In [54]:
soil_gdf[soil_gdf.contains(point)].iloc[0]

FID_Soil25                                                   96
soilgroup                                                     3
fertility                                               ปานกลาง
texture_to                                            ดินเหนียว
soilserien                                           ฉะเชิงเทรา
pH_top                                     กรดจัดถึงด่างปานกลาง
soilseries                                                   Cc
FID_wgs84_                                                   20
AMPHOE_IDN                                                 2405
AMP_CODE                                                     05
AMPHOE_T                                            อ.บ้านโพธิ์
AMPHOE_E                                        Amphoe Ban Phoe
PROV_CODE                                                    24
PROV_NAM_T                                         จ.ฉะเชิงเทรา
PROV_NAM_E                                Changwat Chachoengsao
P_CODE                                  

In [68]:
predict(13.43116, 101.59227)

[[-1]]