<a href="https://colab.research.google.com/github/mohannashahrad/Borealis_AI_Plant_Tree_Project/blob/main/Final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing Libraries

In [None]:
# Import all the required libraries
import sys
!{sys.executable} -m pip install darts
from funcs import *
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
import seaborn as sns


ModuleNotFoundError: ignored

#Loading the Dataset

In [None]:
# Loading DataSet
df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/final_data2.csv')
df = df.iloc[: , 1:]
display(df.head(30))

NameError: ignored

#Preprocessing 

This section consists of data normalization and one-hot encoding for discrete features.

In [None]:
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
col_names = ['Land Area (m2)', 'Agriculture Land (m2)', 'Forest Land (m2)', 'Population', 'Pop Growth (%)', 'Urban Pop (%)',
                'GDP (US$)', 'GDP Growth (%)', 'Forest Rents (% GDP)', 'Coal Rents (% GDP)', 'Oil Rents (% GDP)', 'CO2 Emission (kt)',
                'GHG Emision (CO2 eqv)']

df = standardize(df,col_names)
discrete_columns = ["Country Name"]
country_names = df["Country Name"]
df = oneHotEncode(df,discrete_columns)

# Splitting the dataset into test and train sets

In [None]:
# Splitting the dataset into training and test parts
y = df['Tree Loss (ha)']
x = df.loc[:, df.columns != 'Tree Loss (ha)']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
print(X_train.shape)
print(X_test.shape)

# Linear Regression

In [None]:
model = LinearRegression()  
model.fit(X_train, y_train) 
y_pred = model.predict(X_test)
print_analysis(y_pred, y_test)
compare_results(y_pred, y_test)

# XGBoost

In [None]:
dtrain = xgb.DMatrix(data=X_train,label=y_train)
params = {
    'gamma':0,                 
    'learning_rate':0.07,
    'max_depth':5,
    'min_child_weight':1.5,
    'n_estimators':1000,                                                                    
    'reg_alpha':0.75,
    'reg_lambda':0.45,
    'subsample':0.6,
    'seed':42
}
cv_results = xgb.cv(
    params,
    dtrain,
    num_boost_round=999,
    seed=42,
    nfold=5,
    metrics={'mae'},
    early_stopping_rounds=10
)
cv_results

#Hyper parameter Tuning for XGBoost

In [None]:
gridsearch_params = [
    (max_depth, min_child_weight)
    for max_depth in range(4,10)
    for min_child_weight in range(1,8)
]

min_mae = float("Inf")
best_params = None
for max_depth, min_child_weight in gridsearch_params:
    print("CV with max_depth={}, min_child_weight={}".format(
                             max_depth,
                             min_child_weight))
    # Update our parameters
    params['max_depth'] = max_depth
    params['min_child_weight'] = min_child_weight
    # Run CV
    cv_results = xgb.cv(
        params,
        dtrain,
        num_boost_round=1000,
        seed=42,
        nfold=5,
        metrics={'mae'},
        early_stopping_rounds=10
    )
    # Update best MAE
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = (max_depth,min_child_weight)
print("Best params: {}, {}, MAE: {}".format(best_params[0], best_params[1], min_mae))

In [None]:
data_dmatrix = xgb.DMatrix(data=x,label=y)
xg_reg = xgb.XGBRegressor(colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.07,
                 max_depth=8,
                 min_child_weight=3,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42)
xg_reg.fit(X_train,y_train)

y_pred = xg_reg.predict(X_test)
print_analysis(y_pred, y_test)
compare_results(y_pred, y_test)

#Feature Importance using XGBoost

In [None]:
import matplotlib.pyplot as plt
plt.bar(range(14), xg_reg.feature_importances_[:14])
LABELS = x.columns[:14]
plt.xticks(range(len(xg_reg.feature_importances_[:14])), LABELS, rotation='vertical')
plt.show()