# Emitting Spot Size measurement on 2020-02-07

## Load all the data

In [None]:
import numpy as np
import pandas as pd
import pymysql
import sqlalchemy as sql
import matplotlib.pyplot as plt
import matplotlib.dates as md
import seaborn as sns
sns.set()
import getopt
import sys
import datetime
from scipy.interpolate import interp1d
from NGDataObject import NGDataObject
from scipy.optimize import curve_fit
from scipy import optimize

#*******************************
# CONSTANTS
#*******************************
# connection to database
CREDENTIALS_FILE = '/Users/hkromer/02_PhD/01.github/dash_NG/credentials.pw'
DB = "NG_twofast_DB" # name of the database
HOST = "twofast-RPi3-0"  # database host

# LOOKUP TABLES
LUT_PRESSURE_ION_SOURCE = "/Users/hkromer/02_PhD/01.github/phd/01_neutron_generator_contol/LUT_pressure_ion_source.txt"

### Load from database

In [None]:
# reference detectors
# dose
# HV
DAY = "2020-02-04"
# read password and user to connect to database
credentials = pd.read_csv(CREDENTIALS_FILE, header=0)
user = credentials['username'].values[0]
pw = credentials['password'].values[0]


#*******************************
# GET DATA
#*******************************

# connect to DB
con = NGDataObject(host = HOST, database=DB, user=user, password=pw)

# get dose
query = "SELECT * FROM data_dose WHERE DATE(time) = '%(t)s'" % {"t": DAY}
data_dose = con.get_from_database(query=query)

# get HV
query = "SELECT * FROM data_HV WHERE DATE(time) = '%(t)s'" % {"t": DAY}
data_hv = con.get_from_database(query=query)
data_hv['HV_current_x100'] = data_hv['HV_current']*100.0

# get pressure
query = "SELECT * FROM data_pressure WHERE DATE(time) = '%(t)s'" % {"t": DAY}
data_pressure = con.get_from_database(query=query)

# get reference detectors
query = "SELECT * FROM data_referenceDetectors WHERE DATE(time) > '2020-02-03 20:00:00'" 
data_reference_full = con.get_from_database(query=query)

# get microwave power
query = "SELECT * FROM microwave_generator_power WHERE DATE(time) = '%(t)s'" % {"t": DAY}
data_microwave_power_full = con.get_from_database(query=query)

# get microwave frequency
query = "SELECT * FROM microwave_generator_frequency WHERE DATE(time) = '%(t)s'" % {"t": DAY}
data_microwave_frequency_full = con.get_from_database(query=query)

# save to file
data_reference_full['sum_counts'] = data_reference_full['counts_D1'] + data_reference_full['counts_D2'] + data_reference_full['counts_D3'] + data_reference_full['counts_D4'] 

path = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/DATA/emitting_spot_size_2020-02-04/'
data_dose.to_csv(f'{path}/2020-02-07_dose.csv')
data_hv.to_csv(f'{path}/2020-02-07_hv.csv')
data_pressure.to_csv(f'{path}/2020-02-07_pressure.csv')
data_reference_full.to_csv(f'{path}/2020-02-07_reference_detectors.csv')
data_microwave_power_full.to_csv(f'{path}/2020-02-07_microwave_power.csv')
data_microwave_frequency_full.to_csv(f'{path}/2020-02-07_microwave_frequency.csv')

### Load from stored csv files

In [None]:
path = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/DATA/emitting_spot_size_2020-02-04/'
DAY = "2020-02-04"

data_dose = pd.read_csv(f'{path}/2020-02-07_dose.csv', index_col=0)
data_dose.index = pd.to_datetime(data_dose.index)

data_hv = pd.read_csv(f'{path}/2020-02-07_hv.csv', index_col=0)
data_hv.index = pd.to_datetime(data_hv.index)

data_pressure = pd.read_csv(f'{path}/2020-02-07_pressure.csv', index_col=0)
data_pressure.index = pd.to_datetime(data_pressure.index)

data_reference_full = pd.read_csv(f'{path}/2020-02-07_reference_detectors.csv', index_col=0)
data_reference_full.index = pd.to_datetime(data_reference_full.index)

data_microwave_power_full = pd.read_csv(f'{path}/2020-02-07_microwave_power.csv', index_col=0)
data_microwave_power_full.index = pd.to_datetime(data_microwave_power_full.index)

data_microwave_frequency_full = pd.read_csv(f'{path}/2020-02-07_microwave_frequency.csv', index_col=0)
data_microwave_frequency_full.index = pd.to_datetime(data_microwave_frequency_full.index)


In [None]:
fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_dose.index, y='dose_corrected', data=data_dose, ax=ax, color='darkblue')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Dose [muSv/hr]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_hv.index, y='HV_voltage', data=data_hv, ax=ax, color='darkred')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('HV V [kV]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_hv.index, y='HV_current', data=data_hv, ax=ax, color='darkorange')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('HV I [mA] (incl. leakage)')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_reference_full.index, y='sum_counts', data=data_reference_full, ax=ax, color='darkgreen')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Counts in reference \n detectors [1/(30s)]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

#### Select only relevant time

In [None]:
start = "2020-02-04 11:54:00"
end = "2020-02-04 20:00:00"

data_dose_s = data_dose.loc[start:end, ]
data_hv_s = data_hv.loc[start:end, ]
data_reference_s = data_reference_full.loc[start:end, ]


### Counts in emitting spot size detector

In [None]:
# tungsten counts
fname = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/2020-02-04.EmittingSpot_130kV/2020-02-04_readout.csv'
data_ess = pd.read_csv(fname, index_col='time')
data_ess = data_ess[['readtime', 'value']]
data_ess.index = pd.to_datetime(data_ess.index)
# background measurement use the one from the day before
fname = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/2020-01-30.EmittingSpot.Leadshield/2020-01-31_readout.csv'
data_ess_BG = pd.read_csv(fname, index_col='time')
data_ess_BG = data_ess_BG[['readtime', 'value']]
data_ess_BG.index = pd.to_datetime(data_ess_BG.index)
data_ess_BG.info()

#### Background correction of emitting spot size detector and reference detector

In [None]:
start_BG = "2020-01-30 20:00:00"
end_BG = "2020-01-31 08:00:00"

data_ess_BG = data_ess_BG.loc[start_BG:end_BG, ]
fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_ess_BG.index, y='value', data=data_ess_BG, ax=ax)

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Counts in ess detector, BG')
ax.set_xlabel(f'Time, 2020-01-31')
plt.show()
print(f"Mean background counts per 30 s interval: {data_ess_BG.value.mean()} 1/(30s)")
counting_time = data_ess_BG['readtime'].value_counts().index[0]/1000
counts_per_s_BG = data_ess_BG.value.mean()/counting_time
print(f"Mean background counts per second: {counts_per_s_BG} 1/s")
print(f"Total counts: {data_ess_BG.value.sum()}")
timedelta = data_ess_BG.index[-1]-data_ess_BG.index[0] 
print(f"Total time duration: {timedelta}")
print(f"Total time duration: {timedelta.seconds} s")
print(f"Total meas positions expected in that time: {timedelta.seconds/counting_time} ")
print(f"Total measurement points: {data_ess_BG.shape}")
counts_per_s_BG_agg = data_ess_BG.value.sum()/timedelta.seconds
print(f"Total counts per time duration: {counts_per_s_BG_agg} 1/s")

In [None]:
data_reference_full_BG = pd.read_csv(f'/Users/hkromer/02_PhD/02_Data/13.new_chamber/DATA/emitting_spot_size_2020-01-31//2020-02-07_reference_detectors.csv', index_col=0)
data_reference_full_BG.index = pd.to_datetime(data_reference_full_BG.index)
start_BG = "2020-01-30 20:00:00"
end_BG = "2020-01-31 07:55:00"

data_ref_BG = data_reference_full_BG.loc[start_BG:end_BG, ]
fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_ref_BG.index, y='sum_counts', data=data_ref_BG, ax=ax)

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Counts in reference detector, BG')
ax.set_xlabel(f'Time, 2020-01-31')
plt.show()

print(f"Mean background counts per 30 s interval: {data_ref_BG.sum_counts.mean()} 1/(30s)")
ref_counting_time = data_ref_BG['ard_time'].value_counts().index[0]/1000
ref_counts_per_s_BG = data_ref_BG.sum_counts.mean()/counting_time
print(f"Mean background counts per second: {ref_counts_per_s_BG} 1/s")
print(f"Total counts: {data_ref_BG.sum_counts.sum()}")
timedelta = data_ref_BG.index[-1]-data_ref_BG.index[0] 
print(f"Total time duration: {timedelta}")
print(f"Total time duration: {timedelta.seconds} s")
print(f"Total meas positions expected in that time: {timedelta.seconds/counting_time} ")
print(f"Total measurement points: {data_ref_BG.shape}")
ref_counts_per_s_BG_agg = data_ref_BG.sum_counts.sum()/timedelta.seconds
print(f"Total counts per time duration: {ref_counts_per_s_BG_agg} 1/s")

### Tungsten edge position

In [None]:
fname = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/2020-02-04.EmittingSpotSize.xlsx' 
data_pos = pd.read_excel(fname, header=4)
data_pos = data_pos[['Time', 'Time.1', 'edge_pos']]
data_pos = data_pos.iloc[2:,:] # clear rows at beginning and end
data_pos['edge_pos'] = data_pos['edge_pos'].astype(np.float64)
data_pos['Time'] = data_pos['Time'].apply(lambda x: f'2020-02-04 {x}')
data_pos = data_pos.dropna()
data_pos['Time.1'] = data_pos['Time.1'].apply(lambda x: f'2020-02-04 {x}')
data_pos['Time'] = pd.to_datetime(data_pos['Time'])
data_pos['Time.1'] = pd.to_datetime(data_pos['Time.1'])

data_pos.reset_index(inplace=True, drop=True)
data_pos = data_pos.rename(columns={'Time': 'time_start', 'Time.1': 'time_stop'})


## Clean dose and ref counts

In [None]:
data_dose_s_temp = data_dose_s
# data_dose_s_temp['dose_corrected'] = data_dose_s_temp['dose_corrected'].rolling(100, win_type='gaussian').mean(std=10)
data_dose_s_temp = data_dose_s_temp[data_dose_s_temp['dose_corrected'] > 1200]
fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
data_plot_t = data_dose_s_temp # .loc["2020-02-04 14:00:00":"2020-02-04 15:00:00",:]
sns.lineplot(x=data_plot_t.index, y='dose_corrected', data=data_plot_t, ax=ax, color='darkblue')
data_plot = data_dose_s # .loc["2020-02-04 14:00:00":"2020-02-04 15:00:00",:]
sns.lineplot(x=data_plot.index, y='dose_corrected', data=data_plot, ax=ax, color='darkred', alpha=0.5)

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Dose [muSv/hr]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()


## Some plots - database

In [None]:

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_hv_s.index, y='HV_voltage', data=data_hv_s, ax=ax, color='darkred')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('HV V [kV]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_hv_s.index, y='HV_current', data=data_hv_s, ax=ax, color='darkorange')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('HV I [mA] (incl. leakage)')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_reference_s.index, y='sum_counts', data=data_reference_s, ax=ax, color='darkgreen')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Counts in reference \n detectors [1/(30s)]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

## ESS

In [None]:
fig, ax = plt.subplots(figsize=(10,4))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.lineplot(x=data_ess.index, y='value', data=data_ess, ax=ax, color='red')

ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
ax.set_ylabel('Counts in ess \n detectors [1/(30s)]')
ax.set_xlabel(f'Time, {DAY}')
plt.show()

# Extract data for each tungsten position

### Tungsten edge position

In [None]:
fname = '/Users/hkromer/02_PhD/02_Data/13.new_chamber/2020-02-04.EmittingSpotSize.xlsx' 
data_pos = pd.read_excel(fname, header=4)
data_pos = data_pos[['Time', 'Time.1', 'edge_pos']]
data_pos = data_pos.iloc[2:,:] # clear rows at beginning and end
data_pos['edge_pos'] = data_pos['edge_pos'].astype(np.float64)
data_pos['Time'] = data_pos['Time'].apply(lambda x: f'2020-02-04 {x}')
data_pos = data_pos.dropna()
data_pos['Time.1'] = data_pos['Time.1'].apply(lambda x: f'2020-02-04 {x}')
data_pos['Time'] = pd.to_datetime(data_pos['Time'])
data_pos['Time.1'] = pd.to_datetime(data_pos['Time.1'])

data_pos.reset_index(inplace=True, drop=True)
data_pos = data_pos.rename(columns={'Time': 'time_start', 'Time.1': 'time_stop'})



## Adjustments

In [None]:
# drop errorenous col
# data_pos.loc[data_pos['time_start'] > "2020-02-04 19:13:00"]
data_pos = data_pos.drop(55).reset_index(drop=True)
data_pos.tail(10)

In [None]:
# # replace in t0
# dict_replace = {
#     pd.Timestamp('2020-01-31 13:52:00'): pd.Timestamp('2020-01-31 13:52:30'),
#     pd.Timestamp('2020-01-31 14:26:00'): pd.Timestamp('2020-01-31 14:28:00')
    
# }
# data_pos['t0'] = data_pos['t0'].replace(dict_replace)

# # replace in t1
# dict_replace = {
#     pd.Timestamp('2020-01-31 08:45:00'): pd.Timestamp('2020-01-31 08:43:00'),
#     pd.Timestamp('2020-01-31 13:36:00'): pd.Timestamp('2020-01-31 13:35:00')
# }
# data_pos['t1'] = data_pos['t1'].replace(dict_replace)

In [None]:
# remove first last time

# loop through edge position
ii = 0
for ii in range(0,data_pos.shape[0]):

    # get the beginning and end time, t0 and t1
    t0 = data_pos.iloc[ii, :].loc['time_start']
    t1 = data_pos.iloc[ii, :].loc['time_stop']
    pos = data_pos.iloc[ii, :].loc['edge_pos'] # mm
    ess_counts = data_ess[(data_ess.index > t0) & (data_ess.index < t1)].loc[:,'value']

    counts_max = ess_counts.max()
    counts_min = ess_counts.min()
    counts_std = ess_counts.std()
    
    if len(ess_counts)>0:

        print(f"{counts_max:.2f}, {counts_min:.2f}, {counts_std:.2f}")
        # check the counts in the ess detector, when not significantly change
        fig, ax = plt.subplots(figsize=(8,3))
        sns.set()
        sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
        sns.lineplot(x=ess_counts.index, y=ess_counts, ax=ax)
        ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
        ax.set_ylabel('Counts in ess \n detectors [1/(30s)]')
        ax.set_xlabel(f'Time, {DAY}')
        plt.title(f"{t0} \n {t1}")
        plt.show()
    else:
        print(f"No data for ii {ii} t0 {t0} t1 {t1}")



In [None]:
def get_total_counts(t0, t1, pos):
    # total counts counts in the ess detector

    data_ess_counts = pd.DataFrame(data_ess[(data_ess.index >= t0) & (data_ess.index <= t1)].loc[:,'value'])
    if len(data_ess_counts) > 0:

        # counting time in the ess in seconds
        ess_time = (data_ess_counts.index[-1]-data_ess_counts.index[0]).seconds
        ess_counts = data_ess_counts['value'].sum()
        # background counts during this time interval
        ess_bg_counts = counts_per_s_BG_agg * ess_time
        # background corrected counts
        ess_counts_bg_corr = ess_counts - ess_bg_counts
        # background corrected cps in ess
        ess_cps = ess_counts_bg_corr / ess_time
        
        t1 = data_ess_counts.index[-1]
        t0 = data_ess_counts.index[0]
        
        # reference detector counts in that interval
        ref_data = data_reference_s[(data_reference_s.index >= t0) & (data_reference_s.index <= t1)]
        if len(ref_data) > 0:
            ref_total_counts = ref_data.loc[:, 'sum_counts'].sum()
            # reference detector counting time
            ref_total_time = (ref_data.index[-1]-ref_data.index[0]).seconds
            # correct reference count using its background
            ref_bg_counts = ref_counts_per_s_BG_agg * ref_total_time
            # correct the reference counts with the background counts
            ref_counts_bg_corr = ref_total_counts - ref_bg_counts
            # background corrected cps in reference
            ref_cps = ref_counts_bg_corr / ref_total_time
            
            fig, ax = plt.subplots(figsize=(8,3))
            sns.set()
            sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
            sns.lineplot(x=ref_data.index, y=ref_data['sum_counts'], ax=ax)
            ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
            ax.set_ylabel('Counts in ref \n detectors [1/(30s)]')
            ax.set_xlabel(f'Time, {DAY}')
            plt.title(f"{t0} \n {t1}")
            plt.show()
        else:
            ref_cps = 1
            ref_total_counts = 1
            ref_total_time = 1
        
        # normalized ess cps (with reference counts)
        ess_cps_norm_ref = ess_cps / ref_cps
        
        # average dose
        avg_dose = data_dose[(data_dose.index >= t0) & (data_dose.index <= t1)].loc[:, 'dose_corrected'].mean()
        
        # counts normalized by average dose
        ess_cps_norm_dose = ess_cps / avg_dose
        s = pd.Series(
            [t0, t1, pos, ess_counts, ess_time,  
             ess_counts_bg_corr, ref_total_counts, ref_total_time, ess_cps, ref_cps, ess_cps_norm_ref, avg_dose, ess_cps_norm_dose], 
            index=['t0', 't1', 'pos', 'ess_total_counts', 'ess_total_time', 
                   'ess_counts_bg_corr', 'ref_total_counts', 'ref_total_time', 'ess_cps', 'ref_cps', 'ess_cps_norm_ref', 'avg_dose', 'ess_cps_norm_dose']
        )
        return s
    else:
        print(f"No data for t0 {t0}, t1 {t1}, pos {pos})")
        return None
# get total counts in the ess in that interval
# correct with the background (times measurement time)

# get dose in that time interval
# get reference counts in that time interval
data_result = data_pos.apply(lambda x: get_total_counts(x['time_start'], x['time_stop'], x['edge_pos']), axis=1)

data_result

## Predict the reference counts from the dose where missing

In [None]:
def resample_30_seconds(df, cols, range_start, range_end):
    """
    Takes the a grouped df (grouped by day) and resamples the columns cols in 10s
    OUTPUT:
        - dataframe
    """

    d = {}

    s = pd.date_range(start=range_start, end=range_end, freq='30S')
    df_out = pd.DataFrame(pd.Series(s, name='time')).set_index('time')
    for col in cols:
        d[col] = df[col].resample('30S').mean()

    this_d = pd.DataFrame(d)

    df_out = df_out.merge(this_d, left_on=df_out.index, right_on=this_d.index, how='outer')
    df_out = df_out.set_index('key_0')
    
    return df_out

In [None]:
t0 = data_dose_s.index[0]
t1 = data_dose_s.index[-1]
data_res_dose = resample_30_seconds(data_dose_s, ['dose_corrected'], t0, t1)
data_res_dose.index.name = 'time'


data_res_ref = resample_30_seconds(data_reference_s, ['sum_counts'], t0, t1)
data_res_ref.index.name = 'time'

data_res_dose = data_res_dose.loc[:"2020-02-04 19:49:00"]
data_res_ref = data_res_ref.loc[:"2020-02-04 19:49:00"]

for time in data_res_dose.index:
    if time not in data_res_ref.index:
        print(time)

for time in data_res_ref.index:
    if time not in data_res_dose.index:
        print(time)
        

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x=data_res_dose['dose_corrected'].values, y=data_res_ref['sum_counts'].values, ax=ax, s=80)

ax.set_ylabel('ref detector counts')
ax.set_xlabel(f'dose')
plt.xlim(1300, 1750)
plt.show()

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# average readout time for reference detector
ref_readout_time = data_reference_full['ard_time'].value_counts().index[0]/1000

# remove nans from X and y
X = data_res_dose['dose_corrected'].values # select only between 1350 and 1750 muSv/h
y = data_res_ref['sum_counts'].values # select only between 8000 and 13000 counts

X_nan = np.isnan(X)
X = X[~X_nan] 
y = y[~X_nan] 

y_nan = np.isnan(y)
X = X[~y_nan] 
y = y[~y_nan] 

# convert y to cps instead of cp(30s)
y = y / ref_readout_time

X_train = X.reshape(-1,1)
y_train = y.reshape(-1,1)



reg = LinearRegression().fit(X_train, y_train)
# intercept
c = reg.intercept_[0]
# slope
m = reg.coef_[0]
X_fit = np.arange(1350,1750+1,1).reshape(-1,1)
y_fit = reg.predict(X_fit).reshape(-1)
X_fit = X_fit.reshape(-1)

fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x=X, y=y, ax=ax, s=80)
sns.lineplot(x=X_fit, y=y_fit, ax=ax, color='red', label='fit')

ax.set_ylabel('ref detector cps [1/s]')
ax.set_xlabel(f'Dose [muSv/hr]')
plt.xlim(1300, 1750)
plt.show()
# coefficient of determination R^2 
reg.score(X_train, y_train)

In [None]:
reg.predict([[1550]]).reshape(-1)[0]

### Predict the total number of counts, use t0 and t1 as the time

In [None]:
# results subset without reference detector data
data_wo_ref = data_result[data_result['ref_total_counts'] == 1]

def predict_ref_counts(row, reg):
    """ 
    Predicts the reference counts based on the regressor reg and a row.
    """
    t0 = row['t0']
    t1 = row['t1'] 
    # to predict the total number of counts, approximate by the seconds that the ess detector was counting
    t_diff = (t1-t0).seconds
    # average dose in muSv/hr
    avg_dose = row['avg_dose']
    
    # predicted cps in reference detector
    pred_ref_cps = reg.predict([[avg_dose]]).reshape(-1)[0]
    
    # predicted cps in reference detector background correction
    pred_ref_cps_total = pred_ref_cps - ref_counts_per_s_BG_agg 
    
    return pd.Series(
                        [t_diff, pred_ref_cps, pred_ref_cps_total],
        index=["pred_ref_time", "pred_ref_cps", "pred_ref_cps_total"]
                    )

a = data_result.apply(lambda x: predict_ref_counts(x, reg), axis=1)
data_result_pred = data_result.merge(a, on=data_result.index).drop('key_0', axis=1)
data_result_pred.head()

## Visualize the ess

In [None]:
# normalize by the max 
edge_fully_out = data_result[data_result['pos'] == 24].loc[:,'ess_cps_norm_ref'].values.mean()
data_result['ess_cps_norm_ref_scaled'] = data_result['ess_cps_norm_ref']/edge_fully_out

edge_fully_out = data_result[data_result['pos'] == 24].loc[:,'ess_cps_norm_dose'].values.mean()
data_result['ess_cps_norm_dose_scaled'] = data_result['ess_cps_norm_dose']/edge_fully_out


In [None]:
# predicted ref used for the normalization
data_result_pred['ess_cps_norm_ref_pred'] = data_result_pred['ess_cps'] / data_result_pred['pred_ref_cps_total']
# normalize by the max 
edge_fully_out = data_result_pred[data_result_pred['pos'] == 24].loc[:,'ess_cps_norm_ref_pred'].values.mean()
data_result_pred['ess_cps_norm_ref_pred_scaled'] = data_result_pred['ess_cps_norm_ref_pred']/edge_fully_out

### Use ONLY the predicted cps in the reference detectors

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x='pos', y='ess_cps_norm_ref_pred_scaled', data=data_result_pred, ax=ax, s=80)

ax.set_ylabel('ESS cps normalized \n with predicted reference detectors')
ax.set_xlabel(f'Edge position [mm]')

plt.show()

### Use the predictions only where no reference detector data was available

In [None]:
data_result_pred.head()

In [None]:
data_result_pred['ref_cps_pred'] = data_result_pred['ref_cps']
data_result_pred.loc[data_result_pred['ref_cps_pred'] == 1, 'ref_cps_pred'] = data_result_pred.loc[data_result_pred['ref_cps_pred'] == 1, 'pred_ref_cps_total']

In [None]:
# predicted ref used for the normalization, only where the ref was missing
data_result_pred['ess_cps_norm_ref_pred_hybrid'] = data_result_pred['ess_cps'] / data_result_pred['ref_cps_pred']
# normalize by the max 
edge_fully_out = data_result_pred[data_result_pred['pos'] == 24].loc[:,'ess_cps_norm_ref_pred_hybrid'].values.mean()
data_result_pred['ess_cps_norm_ref_pred_hybrid_scaled'] = data_result_pred['ess_cps_norm_ref_pred_hybrid']/edge_fully_out
data_result_pred.head()

In [None]:

fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x='pos', y='ess_cps_norm_ref_pred_hybrid_scaled', data=data_result_pred, ax=ax, s=80)

ax.set_ylabel('ESS cps normalized \n with ref. detectors \n predicted where missing')
ax.set_xlabel(f'Edge position [mm]')

plt.show()

In [None]:
# ess_cps_norm_ref_scaled
data_result_pred_t = data_result_pred
data_result_pred_t = data_result_pred_t[data_result_pred_t['ess_cps_norm_ref_scaled'] < 10]
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x='pos', y='ess_cps_norm_ref_scaled', data=data_result_pred_t, ax=ax, s=80)

ax.set_ylabel('ESS cps normalized \n with ref. detectors ')
ax.set_xlabel(f'Edge position [mm]')

plt.show()

## Use the average dose to normalize

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x='pos', y='ess_cps_norm_dose_scaled', data=data_result, ax=ax, s=80)

ax.set_ylabel('ESS cps normalized \n with average dose')
ax.set_xlabel(f'Edge position [mm]')

plt.show()

# Continue with reference detector normed and fit ESF

In [None]:
def get_edge_center(dataframe, metric, thresh_out, thresh_in):
    """
    dataframe: results dataframe,
   metric metric by which to chose the centering (earlier analysis)
    thresh_out: x positions when the edge was out
    thresh_in: x positions when the edge was fully in
    """
#     thresh_out = 21  # edge completely removed
#     thresh_in = 15.5  # edge completely in

    mu_out = np.mean(dataframe[metric][ dataframe.pos > thresh_out ])
    mu_in = np.mean(dataframe[metric][ dataframe.pos < thresh_in ])
    print(f'Mean of counts when edge is out: {mu_out}')
    print(f'Mean of counts when edge is in: {mu_in}')
    

    mu_out = np.mean(dataframe[metric][ dataframe.pos > thresh_out ])
    mu_in = np.mean(dataframe[metric][ dataframe.pos < thresh_in ])
    print(f'Mean of counts when edge is out: {mu_out}')
    print(f'Mean of counts when edge is in: {mu_in}')

    # linear fit around the middle region
    center_estimated = 18.5  # edge pos center estimated in mm
    pts_fit = 1  # mm left and right of center to take for fit
    x_fit_range1 = center_estimated + pts_fit
    x_fit_range2 = center_estimated - pts_fit
    def fitfunc(p, x):
        return p[0] * x + p[1]
    def errfunc(p, x, y):
        return fitfunc(p, x) - y # Distance to the fit function

    p0 = [1, 1] # Initial guess for the parameters
    X_f = dataframe.pos[ (dataframe.pos <= x_fit_range1) & (dataframe.pos >= x_fit_range2) ].values
    Y_f = dataframe[metric][ (dataframe.pos <= x_fit_range1) & (dataframe.pos >= x_fit_range2) ].values

    p1, success = optimize.leastsq(errfunc, p0[:], args=(X_f, Y_f))
    X_fit = np.arange(15.5,21+0.01,0.01)
    Y_fit = fitfunc(p1,X_fit)

    # this code plots the centering
    fig, ax = plt.subplots(figsize=(10,5))
    sns.set()
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    plt.scatter(dataframe.pos, dataframe[metric])
    plt.plot([dataframe.pos.min(), dataframe.pos.max()], [mu_in, mu_in], c='red', label='mu edge in or out')
    plt.plot([dataframe.pos.min(), dataframe.pos.max()], [mu_out, mu_out], c='red')

    plt.plot(X_fit, Y_fit, c='green', label='linear fit')
    plt.xlabel('Edge position [mm]')
    plt.ylabel('ESS cps normalized \n with ref detectors')
    plt.grid(True)
    plt.legend(loc='best')
    plt.show()



    # find where the linear fit intersects the mean curved (edge fully in or fully out curve)
    def find_nearest(array,value):
        idx = (np.abs(array-value)).argmin()
        return idx

    near_mu = np.array([])  # first entry: edge fully in, second entry: edge fully out in mm edge position
    for mu in [mu_in, mu_out]:
        idx = find_nearest(Y_fit, mu)
        near_mu = np.append(near_mu,X_fit[idx])

    # center is in between the two
    center = np.mean(near_mu)
    print(near_mu)
    print(f"Center point (symmetry) is {center} mm.")
    
    return center

#### Center for the hybrid approach (reference detectors and where not available predicted from dose)

In [None]:
thresh_out = 21
thresh_in = 15.5
metric = 'ess_cps_norm_ref_pred_hybrid_scaled'
center = get_edge_center(data_result_pred, metric, thresh_out, thresh_in)
data_result_pred['pos_centered_hybrid'] = data_result['pos'] - center


In [None]:
m = [1e-2, 5e-2, 1e-1, 5e-1, 0, 1, 5, 1e1]
x = data_result_pred['pos_centered_hybrid'].values
x = x[~np.isnan(x)]
y = data_result_pred['ess_cps_norm_ref_pred_hybrid_scaled'].values
y = y[~np.isnan(y)]
def fitfunc(x, *p):
    a, b, c, d = p
    z = np.exp( -( (x-b)/(c) ) )
    # z = np.exp( -( (x-p[1])/(p[2]) ) )
    return (a / ( 1 + z )) + d
    # return (p[0] / ( 1 + z )) + p[3]

p0 = [0.5,0.5,0.5,0.5]
popt, pcov = curve_fit(fitfunc, x, y, p0=p0, maxfev=1000000)
# print(popt)
FWHM = 3.53*popt[2]  # 3.53 * c in fermi function
# print(FWHM)
X = np.arange(-10, 10+0.01, 0.01)
Y_fit = fitfunc(X, *popt)
    
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x=data_result_pred['pos_centered_hybrid'], y=data_result_pred['ess_cps_norm_ref_pred_hybrid_scaled'], color='darkblue', ax=ax, s=100, label='measurement')
sns.lineplot(x=X,y=Y_fit, color='darkred', ax=ax, label='logistic fit')
ax.set_ylabel('ESS cps normalized \n with ref. detectors \n and pred. from avg dose')
ax.set_xlabel(f'Edge position [mm]')
plt.text(2, 0.9, f'FWHM {FWHM:.2f} mm')
# ax.set_xticks(np.arange(-7.5,7.5+2,2))
plt.xlim(-10,10)
plt.show()

In [None]:
l_SS_tot = lambda x: ((x-np.mean(x))**2).sum() # total sum of squares
l_SS_res = lambda y_pred, y_true: ((y_pred-np.mean(y_true))**2).sum() # residual sum of squares
l_R2 = lambda SS_tot, SS_res: 1 - (SS_res/SS_tot) # coefficient of determination

y_true = y
y_pred = fitfunc(x, *popt)

SS_tot = l_SS_tot(y_true) # observed data
SS_res = l_SS_res(y_pred, y_true)

R2 = l_R2(SS_tot, SS_res)
SS_tot, SS_res, R2

In [None]:
chisq = np.sum((y_true - y_pred)**2)/(np.std(y_true))**2
chisq/(len(y_pred)-4)
residuals = (y_pred - y_true)
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x=x,y=residuals, color='darkblue', ax=ax, s=100)
ax.set_ylabel('Res')
ax.set_xlabel(f'Residuals')
plt.show()

#### Center for the dose approach

In [None]:
thresh_out = 21
thresh_in = 15.5
metric = 'ess_cps_norm_dose_scaled'
center = get_edge_center(data_result_pred, metric, thresh_out, thresh_in)
data_result_pred['pos_centered_dose'] = data_result['pos'] - center


In [None]:
data_result_pred.head()

In [None]:
m = [1e-2, 5e-2, 1e-1, 5e-1, 0, 1, 5, 1e1]
x = data_result_pred['pos_centered_dose'].values
x = x[~np.isnan(x)]
y = data_result_pred['ess_cps_norm_dose_scaled'].values
y = y[~np.isnan(y)]


def fitfunc(x, *p):
    a, b, c, d = p
    z = np.exp( -( (x-b)/(c) ) )
    # z = np.exp( -( (x-p[1])/(p[2]) ) )
    return (a / ( 1 + z )) + d
    # return (p[0] / ( 1 + z )) + p[3]

p0 = [1,1,1,1]
popt, pcov = curve_fit(fitfunc, x, y, p0=p0, maxfev=1000000)
# print(popt)
FWHM = 3.53*popt[2]  # 3.53 * c in fermi function
# print(FWHM)
X = np.arange(-6, 6+0.01, 0.01)
Y_fit = fitfunc(X, *popt)
    
fig, ax = plt.subplots(figsize=(10,5))
sns.set()
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sns.scatterplot(x=data_result_pred['pos_centered_dose'], y=data_result_pred['ess_cps_norm_dose_scaled'], color='darkblue', ax=ax, s=100, label='measurement')
sns.lineplot(x=X,y=Y_fit, color='darkred', ax=ax, label='logistic fit')
ax.set_ylabel('ESS cps normalized \n with average dose')
ax.set_xlabel(f'Edge position [mm]')
plt.text(2, 0.8, f'FWHM {FWHM:.2f} mm')
# ax.set_xticks(np.arange(-7.5,7.5+2,2))
plt.xlim(-7,7)
plt.show()

#### Get R squared


In [None]:
l_SS_tot = lambda x: ((x-np.mean(x))**2).sum() # total sum of squares
l_SS_res = lambda y_pred, y_true: ((y_pred-np.mean(y_true))**2).sum() # residual sum of squares
l_R2 = lambda SS_tot, SS_res: 1 - (SS_res/SS_tot) # coefficient of determination

y_true = y
y_pred = fitfunc(x, *popt)

SS_tot = l_SS_tot(y_true) # observed data
SS_res = l_SS_res(y_pred, y_true)

R2 = l_R2(SS_tot, SS_res)
SS_tot, SS_res, R2

### Limit the range for the fit

In [None]:

def fit_esf_to_data(x, y, limit_range, m):
    mask_x = ((x >= -limit_range) & (x <= limit_range))
    x = x[mask_x]
    y = y[mask_x]
    print(x,y)
    def fitfunc(x, *p):
        a, b, c, d = p
        z = np.exp( -( (x-b)/(c) ) )
        # z = np.exp( -( (x-p[1])/(p[2]) ) )
        return (a / ( 1 + z )) + d
        # return (p[0] / ( 1 + z )) + p[3]

    p0 = [1,1,1,1]
    popt, pcov = curve_fit(fitfunc, x, y, p0=p0, maxfev=1000000)
    # print(popt)
    FWHM = 3.53*popt[2]  # 3.53 * c in fermi function
    # print(FWHM)
    X = np.arange(-10, 10+0.01, 0.01)
    Y_fit = fitfunc(X, *popt)

    fig, ax = plt.subplots(figsize=(10,5))
    sns.set()
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    sns.scatterplot(x=data_result_pred['pos_centered_dose'], y=data_result_pred['ess_cps_norm_dose_scaled'], color='darkblue', ax=ax, s=100, label='measurement')
    sns.lineplot(x=X,y=Y_fit, color='darkred', ax=ax, label='logistic fit')
    ax.set_ylabel('ESS cps normalized \n with average dose')
    ax.set_xlabel(f'Edge position [mm]')
    plt.text(2, 0.8, f'FWHM {FWHM:.2f} mm')
    # ax.set_xticks(np.arange(-7.5,7.5+2,2))
    plt.xlim(-7,7)
    plt.show()

In [None]:
m = [1e-2, 5e-2, 1e-1, 5e-1, 0, 1, 5, 1e1]
# limit to some ranges
x = data_result_pred['pos_centered_dose'].values
x = x[~np.isnan(x)]
y = data_result_pred['ess_cps_norm_dose_scaled'].values
y = y[~np.isnan(y)]

limit_range = 1 # plus minus in mm
fit_esf_to_data(x, y, limit_range, m)

In [None]:

arr_r = np.arange(0.3, 2+0.01, 0.01) # mm
arr_r = 0.1 * arr_r # cm
arr_r

In [None]:
np.arange(0.3, 0.5+0.005, 0.005).shape

In [None]:
0.06