# Clustering using all features

In [1]:
 %matplotlib inline
import seaborn
import matplotlib.pyplot as plt
from kmeans import kmeans, randomsample
import numpy as np
import pandas as pd
import random
import os
from BorealWeights import BorealWeightedProblem
from pyomo.opt import SolverFactory
from gradutil import *
seed = 2

In [2]:
revenue, carbon, deadwood, ha = init_boreal()

In [3]:
X = np.concatenate((carbon.values, ha.values, deadwood.values, revenue.values), axis=1)
np.shape(X)

In [4]:
Y = np.concatenate((carbon.values, ha.values, deadwood.values, revenue.values), axis=1)

In [5]:
np.nanmin(ha.values)

In [6]:
np.nanmax(ha.values)

In [7]:
np.nanmin(X[:,7:14])

In [8]:
%%time
random.seed(seed)
np.random.seed(seed)
# preprocessing to add penalty for Nan values
X[np.isnan(X)] = np.nanmin(X) - np.nanmax(X)
randomcenters = randomsample(X, 50)
centers, xtoc, dist = kmeans(X,
                             randomcenters,
                             delta=.00001,
                             maxiter=100,
                             metric='cosine',
                             verbose=1)

In [9]:
%%time
C = centers.copy()
weights = np.array([sum(xtoc==i) for i in range(len(C))])

In [10]:
Ccarbon = C[:,0:7]
Cha = C[:,7:14]
Cdeadwood = C[:,14:21]
Crevenue = C[:,21:]

In [11]:
%%time
opt = SolverFactory('glpk')

clustProblemCarbon = BorealWeightedProblem(Ccarbon,weights)
resCarbon = opt.solve(clustProblemCarbon.model, False)

clustProblemHa = BorealWeightedProblem(Cha,weights)
resHA = opt.solve(clustProblemHa.model, False)

clustProblemDeadwood = BorealWeightedProblem(Cdeadwood,weights)
resDeadwood = opt.solve(clustProblemDeadwood.model, False)

clustProblemRevenue = BorealWeightedProblem(Crevenue,weights)
resRevenue = opt.solve(clustProblemRevenue.model, False)

In [12]:
carbonSurrogateList = res_to_list(clustProblemCarbon.model)
haSurrogateList = res_to_list(clustProblemHa.model)
deadwoodSurrogateList = res_to_list(clustProblemDeadwood.model)
revenueSurrogateList = res_to_list(clustProblemRevenue.model)

In [13]:
resultSurrogateCarbon = cluster_to_value(Ccarbon, carbonSurrogateList, weights)
resultSurrogateHa = cluster_to_value(Cha, haSurrogateList, weights)
resultSurrogateDeadwood = cluster_to_value(Cdeadwood, deadwoodSurrogateList, weights)
resultSurrogateRev = cluster_to_value(Crevenue, revenueSurrogateList, weights)

In [14]:
print('Results straight from the surrogate values:')
print("(i) Harvest revenues {:.0f} M€".format(resultSurrogateRev/1000000))
print("(ii) Carbon storage {:.0f} x 100 MgC".format(resultSurrogateCarbon/100))
print("(iii) Deadwood index {:.0f} m3".format(resultSurrogateDeadwood))
print("(iv) Combined Habitat {:.0f}".format(resultSurrogateHa))

In [15]:
resultOriginCarbon = clusters_to_origin(X[:,:7], xtoc, carbonSurrogateList)
resultOriginHa = clusters_to_origin(X[:,7:14], xtoc, haSurrogateList)
resultOriginDeadwood = clusters_to_origin(X[:,14:21], xtoc, deadwoodSurrogateList)
resultOriginRev = clusters_to_origin(X[:,21:], xtoc, revenueSurrogateList)

In [16]:
print('Results when surrogate mapped to real values:')
print("(i) Harvest revenues {:.0f} M€".format(resultOriginRev/1000000))
print("(ii) Carbon storage {:.0f} x 100 MgC".format(resultOriginCarbon/100))
print("(iii) Deadwood index {:.0f} m3".format(resultOriginDeadwood))

print("(iv) Combined Habitat {:.0f}".format(resultOriginHa))

These are the correct results from the original single objective optimizations:
- (i) Harvest revenues 250 M€
- (ii) Carbon storage 44490 x 100 MgC
- (iii) Deadwood index 218153 m3
- (iv) Combined Habitat 10327


The greatest difference is in the HA value. The values are so different, that it is really worth considering if there is some problem with preprocessign or something.