# Load ALL the data in the database. Requires > 20 GB of RAM!!!!

In [None]:
import numpy as np
import pandas as pd
import pymysql
import sqlalchemy as sql
import datetime 
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

In [None]:


# read password and user to database
credentials_file = '/Users/hkromer/02_PhD/01.github/dash_NG/credentials.pw'

credentials = pd.read_csv(credentials_file, header=0)
user = credentials['username'].values[0]
pw = credentials['password'].values[0]


host="twofast-RPi3-0"  # your host
user=user  # username
passwd=pw  # password
db="NG_twofast_DB" # name of the database
connect_string = 'mysql+pymysql://%(user)s:%(pw)s@%(host)s/%(db)s'% {"user": user, "pw": pw, "host": host, "db": db}
sql_engine = sql.create_engine(connect_string)

def readDoseOverTime(sql_engine, startTime): 
	"""
	Read the flow meter voltage read from the database
	"""
	query = "SELECT * FROM data_dose WHERE time > %(t)s ORDER BY id DESC" % {"t": startTime}
	df = pd.read_sql(query, sql_engine)

	return df

def readHVOverTime(sql_engine, startTime):
	"""
	Read the flow meter voltage read from the database
	"""
	query = "SELECT * FROM data_HV WHERE time > %(t)s ORDER BY id DESC" % {"t": startTime}
	df = pd.read_sql(query, sql_engine)

	return df

def extract_day(row):
    day = row.day
    month = row.month
    year = row.year
#     print(f'{year}-{month}-{day}')
    return f'{year}-{month}-{day}'

def remove_nonsense_days(df, threshold):
    """
    Remove nonsense days from the dataframe. A nonsense day is one where the HV voltage entry was not above -50 kV.
    """
    max_voltage = df['HV_voltage'].max()
    day = df['day'].unique()[0]
    print(day, max_voltage)
#     print(max_voltage)
    if max_voltage >= threshold:
        return df
    else:
        pass

## Dose

In [None]:
startTime = "2017-01-01"
df_dose = readDoseOverTime(sql_engine, startTime)
df_dose = df_dose.iloc[::-1]
# df_dose.set_index('time', inplace=True)
df_dose.to_csv('/Users/hkromer/02_PhD/tmp/df_dose.csv')

In [None]:
df_dose = pd.read_csv('/Users/hkromer/02_PhD/tmp/df_dose.csv', index_col=0)
df_dose['time'] = pd.to_datetime(df_dose['time'])

In [None]:
print(df_dose.info())
df_dose.head()

## HV

In [None]:
startTime = "2017-01-01"
df_hv = readHVOverTime(sql_engine, startTime)
df_hv.to_csv('/Users/hkromer/02_PhD/tmp/df_hv.csv')
df_hv = df_hv.iloc[::-1]
# df_hv.set_index('time', inplace=True)

In [None]:
df_hv = pd.read_csv('/Users/hkromer/02_PhD/tmp/df_hv.csv', index_col=0)
df_hv['time'] = pd.to_datetime(df_hv['time'])

In [None]:
print(df_hv.info())
df_hv.head()

### Drop all days that have HV voltage entries below the threshold

- threshold is chosen as 50 kV

In [None]:
threshold = 50
df_hv['day'] = df_hv['time'].apply(lambda x: extract_day(x))
a = df_hv.groupby(['day'], as_index=False).apply(lambda x: remove_nonsense_days(x, threshold))
a = a.dropna(how='all')
df_hv = a
df_hv.to_csv('/Users/hkromer/02_PhD/tmp/df_hv_nonsense_days_removed.csv')

### Resample to have data every 10 seconds

In [None]:
df_hv.set_index('time', inplace=True)
# df_hv.reset_index(inplace=True)
df_hv.head()

In [None]:
def resample_10_seconds(df, cols):
    """
    Takes the grouped df (groupby day) and resample HV_voltage and HV_current in 10s
    """
    day = df['day'].unique()[0]   
    d = {}
    range_start = f'{day} 00:00:00'
    range_end = f'{day} 23:59:00'
    # resample 24 hours in steps of 10 s
    s = pd.date_range(start=range_start, end=range_end, freq='10S')
    df_out = pd.DataFrame(pd.Series(s, name='time')).set_index('time')
    for col in cols:
        d[col] = df[col].resample('10S').mean()
#         df_out[col] = 0
    this_d = pd.DataFrame(d)
    
    df_out = df_out.merge(this_d, left_on=df_out.index, right_on=this_d.index, how='outer')
#     df_out = df_out.drop(columns=['level_0', 'level_1'])
    return df_out

df_hv_resampled = df_hv.groupby(['day'], as_index=False).apply(lambda x: resample_10_seconds(x, ['HV_voltage', 'HV_current'])).reset_index()
df_hv_resampled.rename(columns={'key_0': 'time'}, inplace=True)
df_hv_resampled.drop(columns=['level_0', 'level_1'], inplace=True)
df_hv_resampled['day'] = df_hv_resampled['time'].apply(lambda x: extract_day(x))
df_hv_resampled.set_index('time', inplace=True)
df_hv_resampled.fillna(0, inplace=True)
df_hv_resampled.to_csv('/Users/hkromer/02_PhD/tmp/df_hv_nonsense_days_removed_resampled.csv')

### Go through the dose dataframe, drop those dates that have no HV data

In [None]:
# df_dose = pd.read_csv('/Users/hkromer/02_PhD/tmp/df_dose.csv', index_col=0)
# df_dose = df_dose.iloc[::-1]
# df_dose['time'] = pd.to_datetime(df_dose['time'])
# df_dose.reset_index(inplace=True)
# df_dose.set_index('time', inplace=True)
df_dose.head()

In [None]:
useful_days = df_hv_resampled['day'].unique().tolist()
print(useful_days)
def remove_nonsense_days_from_dose(df):
    """
    Remove nonsense days from the dataframe. A nonsense day is one where the HV voltage entry was not above -50 kV.
    """

    day = df['day'].unique()[0]

    if day in useful_days:
        return df
    else:
        pass

# get the day
# df_dose_sample.reset_index(inplace=True)
df_dose['day'] = df_dose['time'].apply(lambda x: extract_day(x))
df_dose = df_dose.dropna(how='all')
print(len(df_dose))
a = df_dose.groupby(['day']).apply(lambda x: remove_nonsense_days_from_dose(x))
df_dose = a.dropna(how='all')
print(len(df_dose))

In [None]:
df_dose.head()

In [None]:
df_dose.to_csv('/Users/hkromer/02_PhD/tmp/df_dose_nonsense_days_removed.csv')

### Resample the remaining dose the same way as the HV

In [None]:
df_dose.set_index('time', inplace=True)
a = df_dose.groupby(['day'], as_index=False).apply(lambda x: resample_10_seconds(x, ['dose', 'dose_voltage', 'dose_corrected'])).reset_index()

a.rename(columns={'key_0': 'time'}, inplace=True)
a.drop(columns=['level_0', 'level_1'], inplace=True)
a['day'] = a['time'].apply(lambda x: extract_day(x))
a.set_index('time', inplace=True)
df_dose_resampled = a.fillna(0)
df_dose_resampled.to_csv('/Users/hkromer/02_PhD/tmp/df_dose_nonsense_days_removed_resampled.csv')

## Combine dose and hv

In [None]:
assert len(df_hv_resampled.day.unique()) == len(df_dose_resampled.day.unique())
df_out = df_hv_resampled.merge(df_dose_resampled, left_on=df_hv_resampled.index, right_on=df_dose_resampled.index, how='outer')
df_out = df_out.drop(columns = 'day_y')
df_out = df_out.rename(columns={'key_0': 'time', 'day_x': 'day'})
df_out.set_index('time', inplace=True)
df_out.to_csv('/Users/hkromer/02_PhD/tmp/df_COMBINED_nonsense_days_removed_resampled.csv')
df_out.head()

## Put all out in separate csv


In [None]:
outfolder = '/Users/hkromer/02_PhD/tmp/data_robert_2/'
def output_daily_data(df, outfolder):
    day = df['day'].unique()[0]
    print(f'Saving day {day}...')
    df.to_csv(f'{outfolder}/{day}.csv')
    
df_out.groupby(['day'], as_index=False).apply(lambda x: output_daily_data(x, outfolder)).reset_index()

# Include the output

- compute from the dose


In [None]:
# 02_PhD/01.github/phd/05_MCNP/02.output_determination/MCNP_neutron_output/fun_getNeutronOutputPer100muSv.py
import pandas as pd
import numpy as np
import os, glob
from shutil import copyfile
import re
import sys


# # copy the MCNP files
# lst_ID = np.arange(126,135,1)
# path = '//fs03//LTH_Neutimag//hkromer//10_Experiments//02_MCNP//'

# for ID in lst_ID:
# 	this_path = '{}CurrentTarget{}/CurrentTarget{}_normal/'.format(path, ID, ID)
# 	for fname in glob.glob('{}df_neutron_output_for_Edeut_*.csv'.format(this_path)):
# 		newfname = fname.replace('df_', 'df_ID{}_'.format(ID))
# 		_ = re.findall(r'(df.+)', newfname)
# 		if len(_)>0:
# 			newfname = _[0]
# 		newfname = '//fs03/LTH_Neutimag/hkromer/02_Simulations/01_Python/MCNP_neutron_output/MCNP_results_oldTarget/{}'.format(newfname)
# 		copyfile(fname, newfname)

def getNeutronOutputPer100muSv(HV=100, LB6411_distance=70, newTarget=1):
	"""
	Retrieves the neutron output per 100µSv/h as determined from MCNP. Only works for the new target. Returns that value
	HV: High voltage. This determines which MCNP run is taken to load the data. Default is -100 kV
	LB6411_distance: Distance between the source and LB6411 position. Default is 70 cm
	newTarget: if 1, then the files for the new target are used, 0 the files for the old target are used
	returns neutron output per 100 µSv/hr as read by the LB6411
	"""
	master_path = '/Users/hkromer/02_PhD/01.github/phd/05_MCNP/02.output_determination/MCNP_neutron_output/'
	if newTarget == 1:
		path_to_MCNP_OutputPer100muSv = '{}MCNP_results_newTarget/'.format(master_path)
	else:
		path_to_MCNP_OutputPer100muSv = '{}MCNP_results_oldTarget/'.format(master_path)


	# get which HVs have been simulated in MCNP
	lst_HV = []
	for fname in glob.glob('{}*.csv'.format(path_to_MCNP_OutputPer100muSv)):
		_ = re.findall(r'(\d+)\.csv', fname)
		lst_HV.append(int(_[0]))


	# list of the ID's for the respective MCNP simulation
	lst_ID = []
	for fname in glob.glob('{}*.csv'.format(path_to_MCNP_OutputPer100muSv)):
		_ = re.findall(r'ID(\d+)_', fname)
		lst_ID.append(int(_[0]))
	# print(lst_ID)
	# find index of the HV in the lst_HV
	try:
		idx = lst_HV.index(HV)
	except ValueError:
		idx = -1

	if idx == -1:
		print('--- Available high voltage settings: {}'.format(lst_HV))
		print('--- High voltage value of ' + str(HV) + ' is not in an MCNP run. sys.exit(). --- ')
		sys.exit()
	else:
		csv_name = '{}df_ID{}_neutron_output_for_Edeut_{}.csv'.format(path_to_MCNP_OutputPer100muSv, lst_ID[idx], lst_HV[idx])
		df = pd.read_csv(csv_name, header=0)

		distance = LB6411_distance

		neutronOutputPer100muSv = df.W[ df.distance == distance ].values
		# print(path_to_MCNP_OutputPer100muSv)

		return neutronOutputPer100muSv

# print(getNeutronOutputPer100muSv(HV=85, LB6411_distance=50, newTarget=0))

## Get a relation between muSv/hr and total neutron output for 50, 55, and 70 cm as position of the LB6411

- returns a df with rows as different distances, cols are the HV values and the values are the outputs in n/s per 100 muSv/hr

In [None]:
HVs = [105, 80, 95, 110, 100, 90, 85]
d = {}
for HV in HVs:
    d[HV] = getNeutronOutputPer100muSv(HV=HV, LB6411_distance=50, newTarget=1)[0]
s_50 = pd.Series(d)

d = {}
for HV in HVs:
    d[HV] = getNeutronOutputPer100muSv(HV=HV, LB6411_distance=55, newTarget=1)[0]
s_55 = pd.Series(d)

d = {}
for HV in HVs:
    d[HV] = getNeutronOutputPer100muSv(HV=HV, LB6411_distance=70, newTarget=1)[0]
s_70 = pd.Series(d)
df_relation = pd.DataFrame([s_50, s_55, s_70], index = ['50', '55', '70'])

df_relation.to_csv('/Users/hkromer/02_PhD/tmp/relation_dose_output.csv')
df_relation

In [None]:
df_relation.loc['50'].sort_values().values

## Add to the dataframe the neutron output


In [None]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

# read the combined dataframe
df = pd.read_csv('/Users/hkromer/02_PhD/tmp/df_COMBINED_nonsense_days_removed_resampled.csv')
df['time'] = pd.to_datetime(df['time'])
df.set_index('time', inplace=True)

In [None]:
# dictionary with the position of the LB6411. Only 2017-12-1 the sphere was at 50 cm 
positions = {'2017-12-1': 50}
interp_50 = interp1d(df_relation.loc['50'].sort_values().index, df_relation.loc['50'].sort_values().values, kind='cubic', fill_value="extrapolate")
interp_70 = interp1d(df_relation.loc['70'].sort_values().index, df_relation.loc['70'].sort_values().values, kind='cubic', fill_value="extrapolate")


interpolation_output = {'50': interp_50, '70': interp_70}

def compute_neutron_output(row):
    HV = row['HV_voltage']
    day = row['day']
    distance = int(row['LB6411_distance'])
    dose = row['dose_corrected']
    
    # get the correct interpolation for HV at that distance of LB6411 sphere
    interp = interpolation_output[str(distance)]
    
    # conversion factor neutronOutputPer100muSv
    factor = (1/100) * interp(HV) 

    return factor * dose
    
df['LB6411_distance'] = df['day'].map(positions).fillna(70)
df['neutron_output'] = df.apply(lambda x: compute_neutron_output(x), axis=1)

In [None]:
df.columns

## Save to csv for Robert

In [None]:
df.to_csv('/Users/hkromer/02_PhD/tmp/df_with_neutron_output.csv')

outfolder = '/Users/hkromer/02_PhD/tmp/data_robert_2/'
cols= ['HV_voltage', 'HV_current', 'dose_corrected', 'neutron_output'] # cols to save

def output_daily_data(df, outfolder, cols):
    day = df['day'].unique()[0]
    print(f'Saving day {day}...')
    df[cols].to_csv(f'{outfolder}/{day}.csv')
    
df.groupby(['day'], as_index=False).apply(lambda x: output_daily_data(x, outfolder, cols)).reset_index()

## Detect if there was operation

- determined by 

In [None]:
my_date = '2019-5-7'
my_range1 = f'{my_date} 08:00:00'
my_range2 = f'{my_date} 18:00:00'
t = df[ (df['day'] == my_date) ].loc[my_range1:my_range2]

plt.figure(figsize=(15,5))
plt.plot(t['neutron_output'], color='blue', label='raw')


# ax = plt.gca()
# ticks = ax.get_xticks()
# plt.plot(resampled_data['HHMM'], resampled_data['HV_current'], color='red', label='resampled 10s', linewidth=1)

# ax.set_xticks(ticks[::60])
# plt.ylim(0, 3000)
plt.ylabel('neutron_output')
plt.xlabel('time')


plt.legend(loc='best')
plt.show

### Resampling comparison some plots

In [None]:
# testplot = a.set_index('time')
raw_data = df_hv_sample.loc['2017-11-29',['HV_voltage', 'HV_current']].reset_index()
resampled_data = testplot.loc['2017-11-29',['HV_voltage', 'HV_current']].reset_index()

In [None]:
from datetime import datetime
def compute_HHMM(row):
#     print(row)
    return datetime.strftime(row, "%H:%M")

resampled_data['HHMM'] = resampled_data['time'].apply(lambda x: compute_HHMM(x))
raw_data['HHMM'] = raw_data['time'].apply(lambda x: compute_HHMM(x))

In [None]:
plt.figure(figsize=(15,5))


plt.plot(raw_data['HHMM'], raw_data['HV_voltage'], color='blue', label='raw')

ax = plt.gca()
ticks = ax.get_xticks()
plt.plot(resampled_data['HHMM'], resampled_data['HV_voltage'], color='red', label='resampled 10s')

ax.set_xticks(ticks[::60])
plt.ylabel('HV_voltage')
plt.xlabel('time')


plt.legend(loc='best')
plt.show

In [None]:
plt.figure(figsize=(15,5))
plt.plot(raw_data['HHMM'], raw_data['HV_current'], color='blue', label='raw')

ax = plt.gca()
ticks = ax.get_xticks()
plt.plot(resampled_data['HHMM'], resampled_data['HV_current'], color='red', label='resampled 10s', linewidth=1)

ax.set_xticks(ticks[::60])
plt.ylabel('HV_current')
plt.xlabel('time')


plt.legend(loc='best')
plt.show

In [None]:
t.head()

In [None]:
my_date = '2019-2-19'
my_range1 = f'{my_date} 08:00:00'
my_range2 = f'{my_date} 18:00:00'
t = df_dose_resampled[ (df_dose_resampled['day'] == my_date) ].loc[my_range1:my_range2]
t_raw = df_dose[ df_dose['day'] == my_date ].loc[my_range1:my_range2]
plt.figure(figsize=(15,5))
plt.plot(t_raw['dose_corrected'], color='blue', label='raw')
plt.plot(t['dose_corrected'], color='red', label='resampled 10s', linewidth=1)

# ax = plt.gca()
# ticks = ax.get_xticks()
# plt.plot(resampled_data['HHMM'], resampled_data['HV_current'], color='red', label='resampled 10s', linewidth=1)

# ax.set_xticks(ticks[::60])
plt.ylabel('dose_corrected')
plt.xlabel('time')


plt.legend(loc='best')
plt.show

In [None]:
my_date = '2019-2-19'
my_range1 = f'{my_date} 08:00:00'
my_range2 = f'{my_date} 18:00:00'
t = df_out[ (df_out['day'] == my_date) ].loc[my_range1:my_range2]
t_raw = df_dose[ df_dose['day'] == my_date ].loc[my_range1:my_range2]
plt.figure(figsize=(15,5))
plt.plot(t_raw['dose_corrected'], color='blue', label='raw')
plt.plot(t['dose_corrected'], color='red', label='resampled 10s', linewidth=1)

# ax = plt.gca()
# ticks = ax.get_xticks()
# plt.plot(resampled_data['HHMM'], resampled_data['HV_current'], color='red', label='resampled 10s', linewidth=1)

# ax.set_xticks(ticks[::60])
plt.ylabel('dose_corrected')
plt.xlabel('time')


plt.legend(loc='best')
plt.show

In [None]:
df_out.head()