## Generate clusterings and optimization results

Because the traditional python interpreter seems rather slow compared to notebooks, we use this instead for running some generating procedures.

In [1]:
import gradutil as gu
import numpy as np
import pandas as pd
import logging
import datetime
import simplejson as json
from time import time
from pyomo.opt import SolverFactory
from scipy.spatial.distance import euclidean
from BorealWeights import BorealWeightedProblem

In [2]:
def clustering(x, nclusts, seeds, logger=None, starttime=None):
    res = dict()
    for nclust in nclusts:
        res_clust = dict()
        for seedn in seeds:
            c, xtoc, dist = gu.cluster(x, nclust, seedn, verbose=0)
            res_clust[seedn] = {'c': c.tolist(),
                                'xtoc': xtoc.tolist(),
                                'dist': dist.tolist()}
            if logger:
                logger.info('Clustered to {} clusters. Seed {}'.format(nclust, seedn))
            if starttime:
                logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
        res[nclust] = res_clust
        if logger:
            logger.info('Clustered to {:2.0f} clusters'.format(nclust))
        if starttime:
            logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
        with open('clusterings/new_{}.json'.format(nclust), 'w') as file:
            json.dump(res_clust, file)
    return res

In [4]:
def clustering_to_dict(readfile):
    with open(readfile, 'r') as rfile:
        clustering = json.loads(rfile.read())

    new_clustering = dict()
    for seedn in clustering.keys():
        new_clustering[eval(seedn)] = dict()
        for key in clustering[seedn].keys():
            new_clustering[eval(seedn)][key] = np.array(clustering[seedn][key])
    return new_clustering

In [3]:
def clustering_to_optims(x_orig, x_clust, x_opt, names, clustering, opt, logger=None, starttime=None):
    #optims = dict()
    for nclust in sorted(clustering.keys()):
        n_optims = dict()
        for seedn in clustering[nclust].keys():
            xtoc = np.array(clustering[nclust][seedn]['xtoc'])
            if logger:
                logger.info('Assigning weights')
            if starttime:
                logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
            w = np.array([sum(xtoc == i)
                          for i in range(nclust)
                          if sum(xtoc == i) > 0])
            # Calculate the euclidian center of the cluster (mean)
            # and then the point closest to that center according to
            # euclidian distance, and then use the data format meant
            # for optimization
            if logger:
                logger.info('Assigning centers')
            if starttime:
                logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
            c_close = np.array([x_opt[min(np.array(range(len(xtoc)))[xtoc == i], 
                                          key=lambda index: euclidean(x_clust[index], 
                                                                      np.mean(x_clust[xtoc == i], 
                                                                              axis=0)))] 
                                for i in range(nclust) if sum(xtoc == i) > 0])
            problems = [BorealWeightedProblem(c_close[:, :, i], weights=w)
                        for i in range(np.shape(c_close)[-1])]
            if logger:
                logger.info('Solving problems')
            if starttime:
                logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
            for p in problems:
                opt.solve(p.model)
            n_optims[seedn] = dict()
            for ind, name in enumerate(names):
                n_optims[seedn][name] = dict()
                n_optims[seedn][name]['real'] = gu.model_to_real_values(
                    x_orig[:, :, ind],
                    problems[ind].model,
                    xtoc)
                n_optims[seedn][name]['surrogate'] = gu.cluster_to_value(
                    x_orig[:, :, ind], gu.res_to_list(problems[ind].model), w)
            if logger:
                logger.info('Optimized {} clusters with seed {}'.format(nclust, seedn))
            if starttime:
                logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
        #optims[nclust] = n_optims
        if logger:
            logger.info('Optimized {} clusters with every seed'.format(nclust))
        if starttime:
            logger.info('Since start {}.'.format(str(datetime.timedelta(seconds=time()-starttime))))
        with open('optimizations/new_{}.json'.format(nclust), 'w') as file:
            json.dump(n_optims, file)
    #return optims


In [5]:
revenue, carbon, deadwood, ha = gu.init_boreal()

n_revenue = gu.nan_to_bau(revenue)
n_carbon = gu.nan_to_bau(carbon)
n_deadwood = gu.nan_to_bau(deadwood)
n_ha = gu.nan_to_bau(ha)

revenue_norm = gu.new_normalize(n_revenue.values)
carbon_norm = gu.new_normalize(n_carbon.values)
deadwood_norm = gu.new_normalize(n_deadwood.values)
ha_norm = gu.new_normalize(n_ha.values)

ide = gu.ideal(False)
nad = gu.nadir(False)
opt = SolverFactory('cplex')

x = np.concatenate((n_revenue.values, n_carbon.values, n_deadwood.values, n_ha.values), axis=1)
x_stack = np.dstack((n_revenue, n_carbon, n_deadwood, n_ha))

x_norm = np.concatenate((revenue_norm, carbon_norm, deadwood_norm, ha_norm), axis=1)
x_norm_stack = np.dstack((revenue_norm, carbon_norm, deadwood_norm, ha_norm))

In [6]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

clustering_file = 'clusterings/new_all8301.json'
optim_file = 'optimizations/new_all8301.json'

In [9]:
start = time()
logger.info('Started clustering')
nclusts3 = range(1600, 1700, 50)
seeds = range(2, 12)

clustering(x_norm, nclusts3, seeds, logger, start)
logger.info('All clustered to 1600-1700-50. Time since start {}.'.format(str(datetime.timedelta(seconds=time()-start))))


INFO:__main__:Started clustering
INFO:__main__:Clustered to 1600 clusters. Seed 2
INFO:__main__:Since start 0:00:39.968162.
INFO:__main__:Clustered to 1600 clusters. Seed 3
INFO:__main__:Since start 0:01:27.474076.
INFO:__main__:Clustered to 1600 clusters. Seed 4
INFO:__main__:Since start 0:02:11.982396.
INFO:__main__:Clustered to 1600 clusters. Seed 5
INFO:__main__:Since start 0:02:53.430474.
INFO:__main__:Clustered to 1600 clusters. Seed 6
INFO:__main__:Since start 0:03:42.224782.
INFO:__main__:Clustered to 1600 clusters. Seed 7
INFO:__main__:Since start 0:04:26.090134.
INFO:__main__:Clustered to 1600 clusters. Seed 8
INFO:__main__:Since start 0:05:07.415333.
INFO:__main__:Clustered to 1600 clusters. Seed 9
INFO:__main__:Since start 0:05:47.800097.
INFO:__main__:Clustered to 1600 clusters. Seed 10
INFO:__main__:Since start 0:06:27.807558.
INFO:__main__:Clustered to 1600 clusters. Seed 11
INFO:__main__:Since start 0:07:06.482839.
INFO:__main__:Clustered to 1600 clusters
INFO:__main__:

In [7]:
start = time()
logger.info('Started optimizing')
names = ['revenue', 'carbon', 'deadwood', 'ha']
nclusts4 = range(1600, 1700, 50)
for nclust in nclusts4:
    clusters = clustering_to_dict('clusterings/new_{}.json'.format(nclust))
    clusteri = {nclust:clusters}
    clustering_to_optims(x_stack, x_norm, x_norm_stack, names, clusteri, opt, logger=logger, starttime=start)
logger.info('All optimized to 1550-1700-100. Since start {}'.format(str(datetime.timedelta(seconds=time()-start))))


INFO:__main__:Started optimizing
INFO:__main__:Assigning weights
INFO:__main__:Since start 0:00:04.258278.
INFO:__main__:Assigning centers
INFO:__main__:Since start 0:05:35.965548.
INFO:__main__:Solving problems
INFO:__main__:Since start 0:08:34.284960.
INFO:__main__:Optimized 1600 clusters with seed 2
INFO:__main__:Since start 0:08:38.234586.
INFO:__main__:Assigning weights
INFO:__main__:Since start 0:08:38.235894.
INFO:__main__:Assigning centers
INFO:__main__:Since start 0:14:20.047824.
INFO:__main__:Solving problems
INFO:__main__:Since start 0:17:25.739500.
INFO:__main__:Optimized 1600 clusters with seed 3
INFO:__main__:Since start 0:17:29.014357.
INFO:__main__:Assigning weights
INFO:__main__:Since start 0:17:29.015936.
INFO:__main__:Assigning centers
INFO:__main__:Since start 0:23:23.609925.
INFO:__main__:Solving problems
INFO:__main__:Since start 0:26:25.783004.
INFO:__main__:Optimized 1600 clusters with seed 4
INFO:__main__:Since start 0:26:29.112173.
INFO:__main__:Assigning weig

# In the interactive optimization the given reference point has to be scaled to 0-1 also!