# Modified Definition Test
This notebook seeks to test the modified definition to the disruptivity that I defined using chains of events. I am worried this definition will suck due to the variance of the Poisson distribution but who knows! The new method is outlined in this blog post:

https://cfsenergy.atlassian.net/wiki/spaces/~6318cff19794410874c7744f/blog/2023/05/05/2788819001/Lecture+2+fr+Froude+3+Probabilities+3+7+05+2023

We start with imports.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import minimize, LinearConstraint
from scipy.interpolate import interp1d, LinearNDInterpolator

# Move into the source directory for this notebook to work properly
# Probably want a better way of doing this.
import os
import importlib
os.chdir('../src/')

# Import whatever we need
import disruptivity as dis
import probability as prob
import vis.disruptivity_vis as dis_vis
import vis.probability_vis as prob_vis
from vis.plot_helpers import plot_subplot as plot
import data_loader

# Import tokamak Configuartions
from tokamaks.cmod import CONFIG as CMOD
from tokamaks.d3d import CONFIG as D3D

importlib.reload(dis)
importlib.reload(prob)
importlib.reload(dis_vis)
load_disruptions_mat = data_loader.load_disruptions_mat

Loading is the same as before, we use the premade functions for disruptivity computations.

In [None]:
cmod_df, cmod_indices = load_disruptions_mat('../data/CMod_disruption_warning_db.mat')
n_shots = np.unique(cmod_df.shot).shape[0]
n_shots_no_disrupt = np.unique(cmod_df.shot[cmod_indices['indices_no_disrupt']]).shape[0]
n_shots_disrupt = np.unique(cmod_df.shot[cmod_indices['indices_disrupt']]).shape[0]
assert n_shots_disrupt+n_shots_no_disrupt == n_shots, \
    'Number of disrupts plus number of non disruptions does not equal the total shot number'
print(f'Total Shot Number: {n_shots}, Non-Disrupted Shots: {n_shots_no_disrupt}, Disrupted Shots: {n_shots_disrupt}')

'''
So my goal with this block of code is to find all the portions of flat top disrupted shots 
that are in flat tops. Should be simple enough.
'''

# Entry dictionary
entry_dict_1D = {
    'kappa': CMOD["entry_dict"]["kappa"],
    'ip': CMOD["entry_dict"]["ip"],
}

entry_dict_2D = {
    'kappa':CMOD['entry_dict']['kappa'],
    'z_error':CMOD['entry_dict']['z_error'],
}

Now, we can reuse the histogram binning code for variable timesteps that returns the data indices of data points for each bin. Since this new method essentially tries to compute the dt of subsequent data points, it is mechanically the same as the dt calculation for variable timestep as well! 

In [None]:
# Step 1: Get the histogram with the list of data entries.
# There is no need for numerator and denominators, only histograms of all the data.
hist = dis.indices_to_histogram(cmod_df, entry_dict_2D, cmod_indices['indices_flattop'], 35)

In [None]:
entry_list, max_counter = prob.compute_dt_bin(cmod_df, hist, cmod_indices['indices_flattop'], tau=0, window=2)

In [None]:
dis_dt_list, non_dis_dt_list = prob.entry_list_to_arrays(entry_list, max_counter, hist)

In [None]:
d_array = prob.find_disruptivity(dis_dt_list, non_dis_dt_list, guess = 1)

In [None]:
plot('cmod_ncrit_zerr_disruptivity_kaloyannis.png', dis_vis.subplot_disruptivity2d, (d_array, np.zeros(d_array.shape), hist.bin_edges, entry_dict_2D))

In [None]:
# the big crunch
cmod_vde_shotlist = np.loadtxt("../data/cmod_vde_shotlist.txt", dtype=int)

# Parameter setup
figtype = 'disruptivity_vde_kaloyannis'
shotlist = None # set to None for no shotlist

# Compute indices of interest
indices_n_disrupt, indices_n_total = dis.get_indices_disruptivity(CMOD, cmod_df, cmod_indices, shotlist=shotlist)

for entry in CMOD['entry_dict']:
    # Scuffed but it works
    if (entry!="n_over_ncrit" and entry!="z_error"):
        continue
    
    # Create the entry dict
    entry_dict = {entry:CMOD['entry_dict'][entry]}
    
    # Histogram the data
    hist = dis.indices_to_histogram(cmod_df, entry_dict, indices_n_total, 25)
    hist50 = dis.indices_to_histogram(cmod_df, entry_dict, indices_n_total, 25)
    
    # Continuity checks
    entry_list, max_counter = prob.compute_dt_bin(cmod_df, hist, indices_n_total, tau=0, window=20)
    entry_list50, max_counter50 = prob.compute_dt_bin(cmod_df, hist50, indices_n_total, tau=50, window=20)
    
    # More data prep
    dis_dt_list, non_dis_dt_list = prob.entry_list_to_arrays(entry_list, max_counter, hist)
    dis_dt_list50, non_dis_dt_list50 = prob.entry_list_to_arrays(entry_list50, max_counter50, hist50)
    
    # Minimize
    d_array = prob.find_disruptivity(dis_dt_list, non_dis_dt_list, guess = 1)
    d_array50= prob.find_disruptivity(dis_dt_list50, non_dis_dt_list50, guess = 1)
    
    # Compute Disruptivity and save the plot
    args = (d_array, np.zeros(d_array.shape), hist.bin_edges, entry_dict)
    fig, ax = plot(f'cmod_{entry}_{figtype}.png', dis_vis.subplot_disruptivity1d, args)
    
    # Compute Disruptivity and save the plot
    args = (d_array50, np.zeros(d_array50.shape), hist50.bin_edges, entry_dict)
    fig, ax = plot(f'cmod_{entry}_50_{figtype}.png', dis_vis.subplot_disruptivity1d, args)

### Boundary avoidance time

Soooo last time we did boundary avoidance we were using scipy stuff for 1d interpolator. I want to move away from that since we have the ability to compute these maps in N dimensions. Now the question is how do we do this. I think that the way to do it is to create a mask value that ummm fills the gaps in the data with a high disruptivity value. This of course implies that the disruptivity maps will push the control system away form unexplored regions. I don't know if we should do that in practice but it is the first thing i thought of and it is what I want to do ok so don't @ me.

In [None]:
# Take a disruptivity map of N dimensions and fill unvisited regions with a high disruptivity.
# This means we need a sepearate mask for regions with no data and regions with effectively 0 disruptivity. 
# We want to avoid the situation where the disruptivity is so low that there are functionally no disruptions
# and having the control system avoid these hyper stable scenarios.
(d_array, np.zeros(d_array.shape), hist.bin_edges, entry_dict_1D)
bin_centers = (np.array(hist.bin_edges)[:,1:]+np.array(hist.bin_edges)[:,:-1])/2

# N dimensional meshgrid creation from the bin centers
xx = np.meshgrid(*bin_centers)

# NDINTERPOLATOR
interper = scipy.interpolate.RegularGridInterpolator(bin_centers, d_array,
                                                     method='linear',bounds_error=False,
                                                     fill_value=1e2)
# interpolated([[-1,0],[1.1,0]])

help(scipy.interpolate.RegularGridInterpolator)
# listypoo_centers = []
# listypoo_d = []
# for i, (d, center) in enumerate(zip(optimal_d,bin_centers)):
#     if d!=0:
#         listypoo_d.append(d)
#         listypoo_centers.append(center)
        
# interper = interp1d(listypoo_centers,listypoo_d, kind='linear', fill_value='extrapolate')
# test = np.linspace(0.8,2,100)
# y = interper(test)
# plt.semilogy(test,y)

In [None]:
d_array

In [None]:
# Ok so now lets look at probability of disruption for a pulse
shot = 1120807032
data = np.array(cmod_df[entry_dict_2D.keys()][cmod_df.shot == shot])
y = prob.p_data(interper(data),0.1,True)

# Assume the last data point is the disruption time
disr_index = cmod_df.time_until_disrupt.index[cmod_df.shot == shot][-1]

plt.plot(cmod_df.time[cmod_df.shot == shot],y, label=str(shot))
plt.plot([cmod_df.time[disr_index],cmod_df.time[disr_index]],[0,1], '--', color='orange', label="Disruption Time")

plt.xlabel("Time (s)")
plt.ylabel("Disruption Probability")
plt.ylim(-0,1.)
plt.grid()
plt.legend()

In [None]:
cmod_df.time_until_disrupt.index[cmod_df.shot == shot][-1]