In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 4.2 MB/s 
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.8.22-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 28.2 MB/s 
Collecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 36.1 MB/s 
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.22 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

import geopandas as gpd
from google.colab import drive

import numpy as np

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
train = gpd.read_file( "/content/drive/MyDrive/fundar_deforestacion_input/train_data_final.geojson")
test =  gpd.read_file("/content/drive/MyDrive/fundar_deforestacion_input/test_data_final.geojson")

In [None]:
X = train.loc[:,'NDVI_2000':'NDVI_2019']
y = train['label_0']
X_test = test.loc[:,'NDVI_2000':'NDVI_2019']

In [None]:
clf=RandomForestClassifier(max_depth = 15)
clf.fit(X,y)

RandomForestClassifier(max_depth=15)

In [None]:
test['pred']=clf.predict(X_test)

In [None]:
train['label_0'].value_counts()

1    2100
0    2048
2    1978
Name: label_0, dtype: int64

In [None]:
print(classification_report(test.label_0, test.pred, target_names=['no deforestado', 'deforestado', 'no deforestado (shrubland)']))


                            precision    recall  f1-score   support

            no deforestado       0.70      0.58      0.63       877
               deforestado       0.86      0.89      0.87       900
no deforestado (shrubland)       0.71      0.79      0.75       859

                  accuracy                           0.76      2636
                 macro avg       0.75      0.76      0.75      2636
              weighted avg       0.75      0.76      0.75      2636



In [None]:
#test.plot(c = test.label_0)

In [None]:
#test.plot(c = test.pred)

In [None]:
pipe = Pipeline([('clf', RandomForestClassifier())])

In [None]:
parameters = {'clf__n_estimators':[100, 500, 1000],
              'clf__max_depth':[15, 50, 100, 500, 1000],
              'clf__ccp_alpha':np.arange(0, 1, 0.05)}
pipe = GridSearchCV(pipe, parameters)

In [None]:
pipe.fit(X, y)

GridSearchCV(estimator=Pipeline(steps=[('clf', RandomForestClassifier())]),
             param_grid={'clf__ccp_alpha': array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95]),
                         'clf__max_depth': [15, 50, 100, 500, 1000],
                         'clf__n_estimators': [100, 500, 1000]})

In [None]:
pipe.cv_results_

{'mean_fit_time': array([ 0.7779232 ,  4.02555714,  7.54223747,  1.50491743,  7.7458015 ,
        15.07477078,  1.57552371,  7.89329867, 16.01299052,  1.57999578,
         7.92223272, 15.85303273]),
 'std_fit_time': array([0.01348061, 0.52247362, 0.02079901, 0.01071825, 0.46244831,
        0.02386411, 0.00604848, 0.04178084, 0.49764001, 0.00704673,
        0.02843248, 0.10021015]),
 'mean_score_time': array([0.02533889, 0.11431689, 0.22570238, 0.0389461 , 0.18497791,
        0.36822639, 0.04362059, 0.19447141, 0.39173203, 0.04280834,
        0.19937696, 0.39385629]),
 'std_score_time': array([0.00071879, 0.00118955, 0.00409441, 0.00124603, 0.00363609,
        0.00561101, 0.0040631 , 0.00439661, 0.00877598, 0.00268876,
        0.00691617, 0.00609279]),
 'param_clf__max_depth': masked_array(data=[5, 5, 5, 15, 15, 15, 50, 50, 50, 100, 100, 100],
              mask=[False, False, False, False, False, False, False, False,
                    False, False, False, False],
        fill_value='

In [None]:
pipe.best_estimator_

Pipeline(steps=[('clf',
                 RandomForestClassifier(max_depth=50, n_estimators=500))])

In [None]:
pipe.best_params_

{'clf__max_depth': 50, 'clf__n_estimators': 500}

In [None]:
pipe.best_score_

0.7546507307653894

In [None]:
pipe.score(X_test, test.label_0)

0.7268588770864947

In [None]:
test.to_file("/content/drive/MyDrive/fundar_deforestacion/data/predictions_sklearn.geojson")
