In [1]:
 %matplotlib inline
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

## Initialize data

In [2]:
data_dir = os.path.join(os.getcwd(), '../boreal_data')

carbon = pd.read_csv(os.path.join(data_dir, 'Carbon_storage.csv'))
HA = pd.read_csv(os.path.join(data_dir, 'Combined_HA.csv'))
deadwood = pd.read_csv(os.path.join(data_dir, 'Deadwood_volume.csv'))
revenue = pd.read_csv(os.path.join(data_dir, 'Timber_revenues.csv'))

In [3]:
carbon_clean = carbon.dropna(axis=0, how='any')
HA_clean = HA.dropna(axis=0, how='any')
deadwood_clean = deadwood.dropna(axis=0, how='any')
revenue_clean = revenue.dropna(axis=0, how='any')

# Optimization

Problem formulation

### Carbon without Nans

Lets solve the problem with just one objective and using data without Nan-values

In [33]:
%%time
from Boreal import BorealSolver

bsolver = BorealSolver('glpk', False)
bsolver.solveBoreal(carbon_clean)

In [34]:
def print_solution(solver,data):
    res_dict = dict()
    for i in solver.model.I:
        for j in solver.model.J:
            res_dict[j] = res_dict.get(j,0) + int(solver.model.x[i,j].value)
    print('Handling, # of stands')        
    for key in res_dict:
        print("{:8} {}".format(list(data)[key], res_dict[key]))

In [35]:
print_solution(bsolver, carbon_clean)

So it looks like we really are able to solve this problem using the original data! (Without Nans)

### Carbon where Nan:s as Zeros

Lets replace Nan:s with 0:s and lets try to solve the problem. All the single optimization tasks are maximizing, so in the final solution there should be no zeros anyway. (We can check that later)

In [94]:
carbon_zeros = carbon.copy()
carbon_zeros[carbon_zeros.isnull()] = np.nanmin(carbon_zeros.values) -1

In [95]:
%%time
from Boreal import BorealSolver

zero_bsolver = BorealSolver('glpk', False)
zero_bsolver.solveBoreal(carbon_zeros)

In [96]:
res_dict = dict()
for i in zero_bsolver.model.I:
    for j in zero_bsolver.model.J:
        res_dict[j] = res_dict.get(j,0) + int(zero_bsolver.model.x[i,j].value)
print('Handling, # of stands')        
for key in res_dict:
    print("{:8} {}".format(list(carbon_clean)[key], res_dict[key]))

#### Verifying that solution looks reasonable

In [97]:
def values_to_list(solver, data):
    lst = []
    for i in solver.model.I:
        for j in solver.model.J:
            if solver.model.x[i,j].value == 1:
                lst.append(data.iloc[i,j])
    return lst

In [98]:
lst = values_to_list(zero_bsolver, carbon_zeros)

In [99]:
min(lst)

There is no zeros at all, so at least by that aspect the result looks rational.

In [100]:
max(lst)

In [101]:
np.min(carbon.dropna(axis=0, how='any').values)

In [102]:
np.max(carbon.dropna(axis=0, how='any').values)

## Solving all single objective optimization tasks

#### Solving carbon storage

In [103]:
carbon_bsolver = zero_bsolver

In [104]:
carbon_values = values_to_list(carbon_bsolver, carbon_zeros)
print("Minimum: {}, maximum: {}".format(min(carbon_values), max(carbon_values)))

In [151]:
np.max(carbon)

In [105]:
print_solution(carbon_bsolver, carbon_zeros)

In [160]:
this_bsolver = carbon_bsolver
sum([this_bsolver.model.x[i,j].value for i in this_bsolver.model.I for j in this_bsolver.model.J])

In [122]:
sum(carbon_values)

#### Solving HA

In [106]:
HA_zeros = HA.copy()
HA_zeros[HA_zeros.isnull()] = np.nanmin(HA_zeros.values) - 1

In [107]:
%%time
HA_bsolver = BorealSolver('glpk', False)
HA_bsolver.solveBoreal(HA_zeros)

In [108]:
HA_values = values_to_list(HA_bsolver, HA_zeros)
print("Minimum: {}, maximum: {}".format(min(HA_values), max(HA_values)))

In [109]:
print_solution(HA_bsolver, HA)

In [123]:
sum(HA_values)

#### Solving deadwood

In [110]:
deadwood_zeros = deadwood.copy()
deadwood_zeros[deadwood_zeros.isnull()] = np.nanmin(deadwood_zeros.values) -1

In [111]:
%%time
deadwood_bsolver = BorealSolver('glpk', False)
deadwood_bsolver.solveBoreal(deadwood_zeros)

In [112]:
deadwood_values = values_to_list(deadwood_bsolver, deadwood_zeros)
print("Minimum: {}, maximum: {}".format(min(deadwood_values), max(deadwood_values)))

In [113]:
print_solution(deadwood_bsolver, deadwood_zeros)

In [125]:
sum(deadwood_values)

#### Solving Timber revenue

In [114]:
revenue_zeros = revenue.copy()
revenue_zeros[revenue_zeros.isnull()] = np.nanmin(revenue_zeros.values) -1

In [115]:
%%time
revenue_bsolver = BorealSolver('glpk', False)
revenue_bsolver.solveBoreal(revenue_zeros)

In [116]:
revenue_values = values_to_list(revenue_bsolver, revenue_zeros)
print("Minimum: {}, maximum: {}".format(min(revenue_values), max(revenue_values)))

In [117]:
print_solution(revenue_bsolver, revenue_zeros)

There were some stands with value 0.0, so lets check them:

In [119]:
rv = np.array(revenue_values)
revenue.iloc[rv == 0]

Apparently on some stands there is no way to make any profit, so it is ok that those are only zeros.

In [126]:
sum(revenue_values)

## Comparing optimization to ones in the papers

The single objective results are documented in http://onlinelibrary.wiley.com/doi/10.1111/1365-2664.12790/full
so it is meaningful to compare our results to that one.

Results in the paper:
"The maximum capacity of the landscape 
- (i) to provide harvest revenues (NPV) was 250 M€ (average 5800 € ha−1),
- (ii) to store carbon was 4459 × 103 MgC (average 103 MgC ha−1), 
- (iii) for deadwood index was 218 150 m3 (average 5·1 m3 ha−1) and 
- (iv) for the combined habitat availability was 20 211 (no units) (average 0·47 ha−1)."

For us the correspondig values are:

In [166]:
print("(i) Harvest revenues {:.0f} M€".format(sum(revenue_values)/1000000))
print("(ii) Carbon storage {:.0f} x 100 MgC".format(sum(carbon_values)/100))
print("(iii) Deadwood index {:.0f} m3".format(sum(deadwood_values)))
print("(iv) Combined Habitat {:.0f}".format(sum(HA_values)))

Assuming the " x 103 MgC" being just type and actually meaning "x 100 MgC", there are still differences in stored carbon values and combined habitat availability values.

There is still something weird with the data values given in the paper:


In [200]:
print('Total ha-1 calculated according to the values given in paper:')
print('-'*62)
print('Revenue/(average timber revenue/ha-1) = {}'.format(250000000/5800))
print('Carbon/(average carbon storage /ha-1) = {}'.format(4459*1000/103))
print('Deadwood/(average deadwood index/ha-1) = {}'.format(218150/5.1))
print('Combined habitat/(average habitat/ha-1) = {}'.format(20211/0.47))

Now all these values indicate that there should be ~43000 hectars in total. Paper still states that there were 68 700 hectars. I don't really know if that is a real problem regarding the optimization task, but it's still odd.

# Clustering

### Feature selection

In [121]:
carbon.corr()

### Clustering according to the features