In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch as T
from tqdm import tqdm
from doubleDQN_Agent import DDQNAgent
import cv2

In [None]:
from geosteering import Geosteering

In [None]:
def eval_func(runs, render, fault, single_plot, mean_plot, fig_num):
    if __name__ == '__main__':
        env = Geosteering(render_=render,eval=True,fault=fault)
        n_games = runs
        gamma = 0.999
        
        scores = np.zeros([n_games])
        states = (2*(env.look_ahead)+8,)
        bd_actions = 2*env.bd_step + 2
        fig_num = fig_num

        bd_agent = DDQNAgent(gamma=gamma, lr=0.0005, n_actions=bd_actions, n_states=states,
                    batch_size=32, mem_size=25000, replace=1000, saved_dir='trained network/',
                    env_name='Geosteering_bitdepth_BD=%s_Val=%s_v4'%(bd,val))
        
        bd_agent.Q_eval.load_state_dict(T.load('trained network/Geosteering_bitdepth_BD=2_Val=2_LookAhead=1_GYM.pth'))
        bd_agent.epsilon = 0
        
        UB_post = np.zeros([n_games,env.n_data])
        LB_post = np.zeros([n_games,env.n_data])
        bitdepth = np.zeros([n_games,env.n_data])
        cost = np.zeros(n_games)
        value = np.zeros(n_games)
        contact = np.zeros(n_games)
        
        num_exit = 0
        num_st = 0
        if env.render_:
            result = cv2.VideoWriter('Evaluation.mp4', 
                    cv2.VideoWriter_fourcc(*'MP4V'),
                    5, (1280,720))
        for i in range(n_games):
            done = False
            observation = env.reset()
            score = 0
            while not done:
                action = bd_agent.choose_action(observation)
                if env.exit == 0 and action == env.bd_step*2+1:
                    action = env.bd_step
                if env.exit == 1 and action == env.bd_step*2+1:
                    num_exit += 1
                    num_st += 1
                elif env.exit == 1 and action != env.bd_step*2+1:
                    num_exit+= 1
                observation_, reward, done, info = env.step(action)
                if env.render_:
                    env.render()
                    result.write(env.canvas)
                score += reward
                observation = observation_

            if single_plot: 
                fig = plt.figure(fig_num, figsize=(15,5))
                ax = fig.add_subplot()
                fig.subplots_adjust(top=0.85)
                fig.suptitle('RL for n=%s and reservoir value=%s'%(bd,val),fontsize=20,fontweight='bold')
                ax.set_title(r'Res. contact = {:.2%}, Cost (\$ Million) = {:.2f}, EMV(\$ Million) = {:.2f}'.format(
                    env.res_cont, env.total_cost, env.res_value-env.total_cost), fontsize=16)
                x = np.arange(env.n_data)*env.decdist
                ii = 1
                for kk in np.arange(9,-1,-1):
                    x = env.bitdepth[~np.isnan(env.bitdepth[:,kk]),kk]
                    if len(x) > 0:
                        ax.plot(np.arange(len(x))*env.decdist,x,'-', zorder=i)
                        ax.scatter(np.arange(len(x))*env.decdist,x, zorder=i)
                        ii = ii + 1
                ax.plot(np.arange(env.n_data)*env.decdist,env.UB_sensor[:],'b-')
                ax.plot(np.arange(env.n_data)*env.decdist,env.UB_sensor[:]+env.h,'b-')
                ax.set_ylim([1115,1145])
                plt.gca().invert_yaxis()
                
                fig_num += 1
            scores[i] = score
            
            UB_post[i,] = env.UB_post[:env.n_data]
            LB_post[i,] = env.UB_post[:env.n_data] + env.h
            for kk in np.arange(9,-1,-1):
                x = env.bitdepth[~np.isnan(env.bitdepth[:,kk]),kk]
                if len(x) == env.n_data:
                    bitdepth[i,] = x
            cost[i] = env.total_cost
            value[i] = env.res_value
            contact[i] = env.res_cont
        if env.render_:
            result.release()
        cv2.destroyAllWindows()

        UB_post_mean = np.zeros(env.n_data)
        LB_post_mean = np.zeros(env.n_data)
        bitdepth_mean = np.zeros(env.n_data)
        UB_post_lo = np.zeros(env.n_data)
        LB_post_lo = np.zeros(env.n_data)
        bitdepth_lo = np.zeros(env.n_data)
        UB_post_hi = np.zeros(env.n_data)
        LB_post_hi = np.zeros(env.n_data)
        bitdepth_hi = np.zeros(env.n_data)

        for idx in range(env.n_data):
            UB_post_mean[idx] = np.sum(UB_post[:,idx])/n_games
            LB_post_mean[idx] = np.sum(LB_post[:,idx])/n_games
            bitdepth_mean[idx] = np.sum(bitdepth[:,idx])/n_games

            ci_UB = 1.96 * np.std(UB_post[:,idx])/np.sqrt(env.n_data)
            ci_LB = 1.96 * np.std(LB_post[:,idx])/np.sqrt(env.n_data)
            ci_bd = 1.96 * np.std(bitdepth[:,idx])/np.sqrt(env.n_data)

            UB_post_lo[idx] = UB_post_mean[idx] - ci_UB
            LB_post_lo[idx] = LB_post_mean[idx] - ci_LB
            bitdepth_lo[idx] = bitdepth_mean[idx] - ci_bd

            UB_post_hi[idx] = UB_post_mean[idx] + ci_UB
            LB_post_hi[idx] = LB_post_mean[idx] + ci_LB
            bitdepth_hi[idx] = bitdepth_mean[idx] + ci_bd
        
        if mean_plot: 
            fig = plt.figure(fig_num, figsize=(15,5))
            ax = fig.add_subplot()
            x = np.arange(env.n_data)*env.decdist
            ax.plot(x, bitdepth_mean, 'r-')
            ax.plot(x, bitdepth_mean, 'r.')
            ax.plot(x, UB_post_mean, 'b-')
            ax.plot(x, LB_post_mean, 'b-')
            ax.fill_between(x, bitdepth_lo, bitdepth_hi, color='r', alpha=.2)
            ax.fill_between(x, UB_post_lo, UB_post_hi, color='b', alpha=.1)
            ax.fill_between(x, LB_post_lo, LB_post_hi, color='b', alpha=.1)
            fig.suptitle('RL for n=%s and reservoir value=%s'%(bd,val),fontsize=20,fontweight='bold')
            ax.set_title(r'Res. contact = {:.2%}, Cost (\$ Million) = {:.2f}, EMV(\$ Million) = {:.2f}'.format(
                np.mean(contact), np.mean(cost), np.mean(value-cost), fontsize=16))
            ax.set_ylim([1125,1140])
            plt.gca().invert_yaxis()
            fig_num += 1
        print(num_exit/100,num_st/num_exit)
    return np.mean(scores) , ((env.value-env.drill_cost)*(env.n_data-1)), fig_num

In [None]:
fig_num = 0
for bd in [2]:
    if bd == 2:
        val_list = [2]
    else:
        val_list = [2]
    for val in val_list:
        x = 0
        for fault in ['4-3']:
            A = eval_func(20, render=False, fault=fault, single_plot=False, mean_plot=True, fig_num=fig_num)
            x += A[0]
            fig_num = A[2]