# Python 종 분포 모델링(SDM: Species Distribution Modeling)

## 섹션 1 | 설정 (Set up)

In [None]:
import os

os.getcwd() # 현재 작업 디렉토리

In [None]:
os.chdir('D:\\GEODATA') # 작업 디렉토리 변경
os.getcwd() # 현재 작업 디렉토리

In [None]:
os.mkdir("INPUT") # 작업 디렉토리에 'INPUT(입력)' 폴더 생성
os.mkdir("OUTPUT") # 작업 디렉토리에 'OUTPUT(출력)' 폴더 생성

In [None]:
import geopandas as gpd # GeoPandas(지오판다스)
import shutil # shutil(shell utility, 쉘 유틸리티)
import glob # glob(글로브)

pip install --upgrade pip
pip install wheel
pip install pipwin
pipwin refresh

pipwin install numpy
pipwin install pandas
pipwin install shapely
pipwin install gdal
pipwin install fiona
pipwin install pyproj
pipwin install six
pipwin install rtree
pipwin install geopandas
pipwin install rasterio

pip install matplotlib
pip install xgboost
pip install lightgbm
pip install pyimpute

In [None]:
# JTREE_TRAIN_VEC.csv를 'INPUT/' 폴더로 이동
for f in sorted(glob.glob('DATA/JTREE_TRAIN_VEC*')):
    shutil.copy(f,'INPUT/')

In [None]:
import pandas as pd

train_vec = pd.read_csv("INPUT/JTREE_TRAIN_VEC.csv")
train_vec.head()

## 섹션 2 | 종 적합성(Species suitability) 매핑

In [None]:
# 조슈아 트리 연구지역으로 자른 생물 기후 특징
for f in sorted(glob.glob('DATA/BIOCLIM/bclim*.asc')):
    shutil.copy(f,'INPUT/')

In [None]:
# 래스터 특징
raster_features = sorted(glob.glob(
    'INPUT/bclim*.asc'))
print(raster_features)

In [None]:
# 래스터 특징 수 확인
print(len(raster_features), '개 래스터 특징')

In [None]:
# pyimpute 모듈
from pyimpute import load_targets

In [None]:
train_xs, train_y = train_vec.iloc[:,1:20].values, train_vec.iloc[:,0].values # 훈련 데이터 로드
target_xs, raster_info = load_targets(raster_features) # scikit-learn용 데이터 구조로 래스터 특징 로드
train_xs.shape, train_y.shape # 관측치 크기와 일치하는지 shape 확인

In [None]:
# ML 분류기 로드
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [None]:
# ML 분류기 딕셔너리: 이름, (모델)
CLASS_MAP = {
    'RF': (RandomForestClassifier()),
    'ET': (ExtraTreesClassifier()),
    'XGB': (XGBClassifier()),
    'LGBM': (LGBMClassifier())
    }

In [None]:
# pyimpute, sklearn 로드
from pyimpute import impute
from sklearn import model_selection

In [None]:
# 모델 피팅 및 공간 예측(model fitting and spatial range prediction)
for name, (model) in CLASS_MAP.items():
    # 교차 검증(cross validation)
    k = 5 # k-fold
    kf = model_selection.KFold(n_splits=k)
    accuracy_scores = model_selection.cross_val_score(model, train_xs, train_y, cv=kf, scoring='accuracy')
    print(name + " %d-fold 교차 검증 정확도: %0.2f (+/- %0.2f)"
          % (k, accuracy_scores.mean() * 100, accuracy_scores.std() * 200))
    
    # 공간 예측(spatial prediction)
    model.fit(train_xs, train_y)
    os.mkdir('OUTPUT/' + name + '-IMAGES')
    impute(target_xs, model, raster_info, outdir='OUTPUT/' + name + '-IMAGES',
           class_prob=True, certainty=True)

In [None]:
import rasterio
distr_rf = rasterio.open("OUTPUT/RF-IMAGES/probability_1.0.tif").read(1)
distr_et = rasterio.open("OUTPUT/ET-IMAGES/probability_1.0.tif").read(1)
distr_xgb =  rasterio.open("OUTPUT/XGB-IMAGES/probability_1.0.tif").read(1)
distr_lgbm =  rasterio.open("OUTPUT/LGBM-IMAGES/probability_1.0.tif").read(1)
distr_averaged = (distr_rf + distr_et + distr_xgb + distr_lgbm)/4

In [None]:
# 종 적합성 매핑의 평균값 가시화
from pylab import plt

def plotit(x, title, cmap="Blues"):
    plt.imshow(x, cmap=cmap, interpolation='nearest')
    plt.colorbar()
    plt.title(title, fontweight = 'bold')

plotit(distr_averaged, "Joshua Tree Range, averaged", cmap="Greens")

In [None]:
# 조슈아 트리 국립공원의 종 적합성 가시화
plotit(distr_averaged[100:150, 100:150], "Joshua Tree National Park Suitability", cmap="Greens")