In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import importlib
import dask.dataframe as dd
import gc

gc.enable()


import matplotlib.pyplot as plt



# Local imports
#------------------------------------------------
import WireDAQ.PandasPlus           # Make sure this import is after pandas
import WireDAQ.Constants as cst
import WireDAQ.NXCALS as nx
import WireDAQ.Parser as parser
import WireDAQ.Efficiency as eff

main = __import__('000_Efficiency_per_fill')


# Creating NXCALS variable containers
#------------------------------------------------
wires     = {'B1': [nx.NXCALSWire(loc = loc) for loc in ['L1B1','L5B1']],
             'B2': [nx.NXCALSWire(loc = loc) for loc in ['R1B2','R5B2']]}
beams     = [nx.NXCALSBeam(name) for name in ['B1','B2']]
LHC       = nx.NXCALSLHC()
b_slots   = np.arange(3564)
#------------------------------------------------


# Setting default values
#------------------------------------------------
_default_fig_width  = 2000
_default_fig_height = 400

_default_device = 'DBLM'

_default_import = 'local'
_default_path   = '/home/lumimod/work/run/data/2023/rawdata/'
_default_out    = '/eos/user/p/phbelang/www/Monitoring_BBCW/'


_default_path = '/home/phbelang/002mount'

#------------------------------------------------

# display(pd.DataFrame(gc.get_stats()))



2023-05-26 17:21:34,062 [INFO] 
Limited Total Variation Regularization Support Detected! 
---> CVXPY is not installed. 
---> Many Total Variation Methods require CVXPY including: 
---> velocity, acceleration, jerk, jerk_sliding, smooth_acceleration
---> Please install CVXPY to use these methods.
---> Recommended to also install MOSEK and obtain a MOSEK license.
You can still use: total_variation_regularization.iterative_velocity

2023-05-26 17:21:34,064 [INFO] 
Limited Linear Model Support Detected! 
---> PYCHEBFUN is not installed. 
---> Install pychebfun to use chebfun derivatives (https://github.com/pychebfun/pychebfun/) 
You can still use other methods 

2023-05-26 17:21:34,064 [INFO] 
Limited Linear Model Support Detected! 
---> CVXPY is not installed. 
---> Install CVXPY to use lineardiff derivatives 
You can still use other methods 




>>> Loading nx2pd.py version of 24.10.2022 @ 03:17PM



---
# Dask, computing efficiency
---

In [None]:
# Importing the library
import psutil
 
# Getting % usage of virtual_memory ( 3rd field)
print(40*'-' + '\nBefore')
print('RAM memory % used:', psutil.virtual_memory()[2])
# Getting usage of virtual_memory in GB ( 4th field)
print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)


FILL        = 8773
import_from = 'local'

data_path= _default_path
fill     = FILL
dt       = 10
baseline = None



unix_s,unix_e = parser.fill_unix_times(fill,data_path=data_path)
unix_bins     = np.arange(unix_s,unix_e,dt/1e-9)

# STARTS HERE
#=================================================

def evaluate():
    per_beam_list = []
    for beam in beams:
        # if beam.name == 'B1':
        #     continue
        
        # Variables for this beam
        variables = [beam['dBLM_Amp']['V'],
                    beam['dBLM_Amp']['H-V-S']]
        
        # Iterate through bmode
        per_mode_list = []
        for bmode_path in Path(data_path + f'/HX:FILLN={fill}').glob("*"):
            # just for testing
            # if 'STABLE' not in str(bmode_path):
            #     continue

            
            _partition = dd.read_parquet(bmode_path,columns=variables)
            _partition['unix'] = _partition.index
            _partition = _partition.sort_values(by='unix')
            _partition = _partition.set_index('unix')

            _df = _partition.compute()
        
            per_type_list = []
            for dblmType in ['V','H-V-S']:
                observable = beam.dBLM_Amp[dblmType]
                per_type_list.append(_df.bin_unix(observable,bins=unix_bins))

            # Appending
            per_mode_list.append(pd.concat(per_type_list,axis=1))

            del(_partition)
            del(_df)
            gc.collect()

        # Appending
        per_beam_list.append(pd.concat(per_mode_list,axis=0))

    #Appending
    df = pd.concat(per_beam_list,axis=1)

    # Adding proper timestamp
    #============================================
    df = df.sort_index()
    df.index.name = 'unix'
    df.insert(0,'Timestamp',df.index)
    df.insert(1,'Time',1e-9*(df.index - df.index[0]))
    df['Timestamp'] = df['Timestamp'].apply(lambda t: pd.Timestamp(t).tz_localize('UTC').tz_convert(cst.TZONE))
    #============================================




    display(pd.DataFrame(gc.get_stats()))


    # Importing the library
    import psutil
    
    # Getting % usage of virtual_memory ( 3rd field)
    print(40*'-' + '\nAfter')
    print('RAM memory % used:', psutil.virtual_memory()[2])
    # Getting usage of virtual_memory in GB ( 4th field)
    print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)

from memory_profiler import profile
evaluate = profile(evaluate)
evaluate()


In [None]:
plt.figure()
data = df[beams[1]['dBLM_Amp']['V']].dropna().apply(lambda line:line[222])
plt.plot(data.index,data.values)

In [None]:
FILL        = 8773
import_from = 'local'

data_path= _default_path
fill     = FILL
dt       = 10
baseline = None

variables = [beams[0]['dBLM_Amp']['V'],
            beams[0]['dBLM_Amp']['H-V-S'],
            beams[1]['dBLM_Amp']['V'],
            beams[1]['dBLM_Amp']['H-V-S']]

parser = parser.Memory_profiler()
df = parser.load_and_bin(fill=FILL,variables = variables,dt = 10,beamMode = None,data_path= _default_path)

In [None]:
df

In [None]:
FILL        = 8773
import_from = 'local'

data_path= _default_path
fill     = FILL
dt       = 10
baseline = None

# variables = [beams[0]['dBLM_Amp']['V'],
#             beams[0]['dBLM_Amp']['H-V-S'],
#             beams[1]['dBLM_Amp']['V'],
#             beams[1]['dBLM_Amp']['H-V-S']]

parser = parser.Memory_profiler()
df = parser.load_and_bin_sequential(fill=FILL,variables = None,dt = 10,beamMode = None,data_path= _default_path)

In [2]:
# Importing the library
import psutil
 
# Getting % usage of virtual_memory ( 3rd field)
print(40*'-' + '\nBefore')
print('RAM memory % used:', psutil.virtual_memory()[2])
# Getting usage of virtual_memory in GB ( 4th field)
print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)


FILL        = 8773
import_from = 'local'

data_path= _default_path
fill     = FILL
dt       = 10
baseline = None



unix_s,unix_e = parser.fill_unix_times(fill,data_path=data_path)
unix_bins     = np.arange(unix_s,unix_e,dt/1e-9)

# STARTS HERE
#=================================================

variables = [beams[0]['dBLM_Amp']['V'],
            beams[0]['dBLM_Amp']['H-V-S'],
            beams[1]['dBLM_Amp']['V'],
            beams[1]['dBLM_Amp']['H-V-S']]

_partition = dd.read_parquet(data_path + f'/HX:FILLN={fill}',columns=variables)
_df        = _partition.compute()
_df        = _df.sort_index()

del(_partition)
gc.collect()

per_type_list = []
for col in _df.columns:

    observable = col
    per_type_list.append(_df.bin_unix(observable,bins=unix_bins))

# Appending
df = pd.concat(per_type_list,axis=1)
# Adding proper timestamp
#============================================
df = df.sort_index()
df.index.name = 'unix'
df.insert(0,'Timestamp',df.index)
df.insert(1,'Time',1e-9*(df.index - df.index[0]))
df['Timestamp'] = df['Timestamp'].apply(lambda t: pd.Timestamp(t).tz_localize('UTC').tz_convert(cst.TZONE))
#============================================

# display(pd.DataFrame(gc.get_stats()))


# Importing the library
import psutil
 
# Getting % usage of virtual_memory ( 3rd field)
print(40*'-' + '\nAfter')
print('RAM memory % used:', psutil.virtual_memory()[2])
# Getting usage of virtual_memory in GB ( 4th field)
print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)

----------------------------------------
Before
RAM memory % used: 31.2
RAM Used (GB): 9.2929024
Filename: /home/phbelang/abp/WireDAQ/WireDAQ/PandasPlus.py

Line #    Mem usage    Increment  Occurrences   Line Contents
   113   6557.6 MiB   6557.6 MiB           1   @profile
   114                                         def bin_unix(self,_var,bins=None,keeptype = True):
   115                                             # GROUPING DATA IN TIME WINDOWS
   116   6442.7 MiB   -114.9 MiB           1       sub     = self.dropna(subset=[_var])
   117   5302.2 MiB  -1140.5 MiB           1       grouped = sub.groupby(pd.cut(sub.index,bins=bins))
   118                                         
   119                                             # AVG in each time window
   120   5302.2 MiB      0.0 MiB           1       if keeptype:
   121   5302.2 MiB      0.0 MiB           1           _type  = sub.iloc[0][_var].dtype
   122   4658.6 MiB   -643.6 MiB       13169           values = grouped[_var]

In [13]:
print(40*'-' + '\nAfter')
print('RAM memory % used:', psutil.virtual_memory()[2])
# Getting usage of virtual_memory in GB ( 4th field)
print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)

----------------------------------------
After
RAM memory % used: 47.1
RAM Used (GB): 14.158155776


In [7]:
gc.collect(generation=2)

28

In [8]:
gc.collect(generation=1)

0

In [9]:
gc.collect(generation=0)

0

In [12]:
_partition

NameError: name '_partition' is not defined

In [3]:
df.memory_usage(deep=True)/1e6,_df.memory_usage(deep=True)/1e6

(Index                                                    0.052672
 Timestamp                                                0.052672
 Time                                                     0.052672
 HC.TZ76.BLMDIAMOND2.3:AcquisitionIntegral:intSumBuf1    94.651584
 HC.TZ76.BLMDIAMOND3.3:AcquisitionIntegral:intSumBuf1    94.651584
 HC.TZ76.BLMDIAMOND2.5:AcquisitionIntegral:intSumBuf1    94.651584
 HC.TZ76.BLMDIAMOND3.5:AcquisitionIntegral:intSumBuf1    94.651584
 dtype: float64,
 Index                                                   18.547384
 HC.TZ76.BLMDIAMOND2.3:AcquisitionIntegral:intSumBuf1    61.962120
 HC.TZ76.BLMDIAMOND3.3:AcquisitionIntegral:intSumBuf1    61.962216
 HC.TZ76.BLMDIAMOND2.5:AcquisitionIntegral:intSumBuf1    61.962024
 HC.TZ76.BLMDIAMOND3.5:AcquisitionIntegral:intSumBuf1    61.962120
 dtype: float64)

In [11]:
_df.memory_usage(deep=True)/1e6

Index                                                   18.547384
HC.TZ76.BLMDIAMOND2.3:AcquisitionIntegral:intSumBuf1    61.962120
HC.TZ76.BLMDIAMOND3.3:AcquisitionIntegral:intSumBuf1    61.962216
HC.TZ76.BLMDIAMOND2.5:AcquisitionIntegral:intSumBuf1    61.962024
HC.TZ76.BLMDIAMOND3.5:AcquisitionIntegral:intSumBuf1    61.962120
dtype: float64

In [None]:
#============================================
_df = _df.sort_index()
_df.index.name = 'unix'
_df.insert(0,'Timestamp',_df.index)
_df.insert(1,'Time',1e-9*(_df.index - _df.index[0]))
_df['Timestamp'] = _df['Timestamp'].apply(lambda t: pd.Timestamp(t).tz_localize('UTC').tz_convert(cst.TZONE))
#============================================

In [None]:
_df.set_index('Time')[variables[0]].dropna()

In [None]:
df.set_index('Time')[variables[1]].dropna()

In [None]:
_df.set_index('Time')[variables[1]].dropna()

---
# Testing binning
---

In [None]:
unix_s,unix_e = parser.fill_unix_times(fill,data_path=data_path)
unix_bins     = np.arange(unix_s,unix_e,dt/1e-9)

In [None]:
bins=unix_bins
_var=variables[0]
# GROUPING DATA IN TIME WINDOWS
sub     = _df.dropna(subset=[_var])
grouped = sub.groupby(pd.cut(sub.index,bins=bins))

# AVG in each time window
values   = np.array(grouped[_var].mean())
unix     = np.array(pd.Series(grouped.groups.keys()).apply(lambda line:line.mid))

pd.DataFrame({_var:values},index=unix)

In [None]:
pd.Series(grouped.groups.keys()).apply(lambda line:line.mid).values.dtype

In [None]:
_type = sub.iloc[0][_var].dtype
grouped[_var].mean().apply(lambda line: line.astype(_type)).values

In [None]:
int(sub.index.astype('float32').values[0])/1e9

In [None]:
sub.index.astype('int64').values[0]/1e9

In [None]:
_type = sub.index.dtype

In [None]:
_type

In [None]:
_type = sub.index.dtype
pd.Series(grouped.groups.keys()).apply(lambda line:line.mid.astype(_type)).values

In [None]:
pd.Series(grouped.groups.keys()).apply(lambda line:line.mid).values[0]/1e9

In [None]:
np.array(grouped[_var].mean())

In [None]:
_df.index[0]/1*1e-9

In [None]:
df.iloc[:100]

In [None]:
gc.collect()

In [None]:
sample1 = 

In [None]:
for i in range(15):
    print(df[variables[0]].dropna().iloc[:i].memory_usage(deep=True)/1e6)

In [None]:
i=1
sample1 = df[variables[0]].dropna().iloc[:i]
sample2 = _df[variables[0]].dropna().iloc[:i]

In [None]:
gc.collect()
display(pd.DataFrame({'data':sample1.values}).memory_usage(deep=True)/1e6)

gc.collect()
display(pd.DataFrame({'data':sample2.values}).memory_usage(deep=True)/1e6)

In [None]:
df[variables[0]].memory_usage(deep=True)/1e6

In [None]:
_df[variables[0]].dropna().memory_usage(deep=True)/1e6

In [None]:
_df[variables[0]].dropna()

In [None]:
_df[variables[0]].dropna().iloc[0].nbytes

In [None]:
gc.collect()
display(pd.DataFrame({'data':sample1.values.copy()}).memory_usage(deep=True)/1e6)

gc.collect()
display(pd.DataFrame({'data':sample2.values.copy()}).memory_usage(deep=True)/1e6)

In [None]:
display(pd.Series(sample1.values.tolist()).memory_usage(deep=True)/1e6)
display(pd.Series(sample2.values.tolist()).memory_usage(deep=True)/1e6)
display(pd.Series(sample3.copy(deep=True)).memory_usage(deep=True)/1e6)

In [None]:
_df.dropna(subset=[variables[0]]).iloc[0].nbytes

In [None]:
display(sample1.apply(lambda line: line.astype('int32')).memory_usage(deep=True)/1e6)
display(sample2.apply(lambda line: line.astype('int32')).memory_usage(deep=True)/1e6)

In [None]:
sample2.values.tolist(),sample1.values.tolist()

In [None]:
pd.Series(sample1.values)

In [None]:
pd.Series(sample2.values)

In [None]:
sample2.apply(lambda line: [type(i) for i in line])

In [None]:
plt.figure()
plt.plot(sample1.apply(lambda line: [type(i) if not isinstance(i,np.int32) else 0  for i in line]).values[0])

In [None]:
plt.figure()
plt.plot(sample2.apply(lambda line: [type(i) if not isinstance(i,np.int32) else 0  for i in line]).values[0])

In [None]:
isinstance(sample1.values[0][0],np.int32)

In [None]:
sample2

In [None]:
sample1.values[0]

In [None]:
type(sample1.values)

In [None]:
sample2.values[0].nbytes/1e6

In [None]:
pd.DataFrame({'data':sample1.values}).astype('int32')

In [None]:
sample1.values[0].dtype

In [None]:
type(sample1.values[0]),type(sample2.values[0])

In [None]:
plt.figure()
plt.plot(sample1.values[0]/1e6,sample2.values[0]/1e6,'o')
plt.axis('equal')

In [None]:
sample1,sample2

In [None]:
for i in range(15):
    print(_df[variables[0]].dropna().iloc[:i].memory_usage(deep=True)/1e6)

In [None]:
_df[variables[0]].dropna().iloc[:1].memory_usage(deep=True)/1e6

In [None]:
np.shape(df[variables[0]].dropna().iloc[:1].values[0])

In [None]:
plt.figure()
plt.plot(df[variables[0]].dropna().iloc[:1].values[0],'o')

In [None]:
plt.figure()
plt.plot(_df[variables[0]].dropna().iloc[:1].values[0],'o')

In [None]:
np.shape(_df[variables[0]].dropna().iloc[:1].values[0])

In [None]:
_df[variables[0]].dropna().iloc[:1].values

In [None]:
df[variables[0]].dropna().iloc[:1]

In [None]:
_df[variables[0]].iloc[:1]

In [None]:
len(_df.index)

In [None]:
len(df.index)

In [None]:
df.iloc[:100].index.values.dtype,_df.iloc[:100].index.values.dtype

In [None]:
_df.iloc[:100].index.memory_usage(deep=True)

In [None]:
np.array([_df.index[0]],dtype='int64')

In [None]:
np.array([_df.index[0]],dtype='int32')

In [None]:
pd.Series(grouped.groups.keys()).apply(lambda line:line.mid)

In [None]:
np.array(grouped[_var].mean())

In [None]:
grouped[_var].mean().index.apply(lambda line:line.mid)

In [None]:
grouped[_var].mean().values

In [None]:
import sys

In [None]:
sys.getsizeof(df)/1e6

In [None]:
sys.getsizeof(_df)/1e6

In [None]:
df.info(memory_usage="deep")

In [None]:
_df.info(memory_usage="deep")

In [None]:
df.memory_usage(deep=True)/1e6

In [None]:
_df.memory_usage(deep=True)/1e6

In [None]:
np.shape(df[variables[0]].dropna())

In [None]:
np.shape(_df[variables[0]].dropna())

In [None]:
_df[variables[0]].dropna().memory_usage(deep=True)/1e6

In [None]:
df[variables[0]].dropna().memory_usage(deep=True)/1e6

In [None]:
df[variables[0]].memory_usage(deep=True)/1e6

In [None]:
_df[variables[0]].memory_usage(deep=True)/1e6,df[variables[0]].memory_usage(deep=True)/1e6

In [None]:
_df[variables[0]].dropna().iloc[0].dtype,df[variables[0]].iloc[0].dtype

In [None]:
pd.Series([1,2,3],dtype='int32').dtype,pd.Series([1,2,3],dtype='float64').dtype

In [None]:
pd.DataFrame({'Data':[1,2,3,4,5,6,7,8]},dtype='int32').memory_usage(deep=True),pd.DataFrame({'Data':[1,2,3,4,5,6,7,8]},dtype='float64').memory_usage(deep=True)

In [None]:
pd.DataFrame({'Data':[1,2,3,4,5,6,7,8]},dtype='int32').memory_usage(deep=True),pd.DataFrame({'Data':[1,2,3,4,5,6,7,8]},dtype='float64').memory_usage(deep=True)

In [None]:
_df[variables[0]].dropna()

In [None]:

df[variables[0]].dropna().iloc[0].dtype

In [None]:

_df[variables[0]].dropna().iloc[0].dtype

In [None]:
_df.rename(columns={beam.dBLM_Amp['V']:f'DBLM.{beam.name}.V'},inplace=True)

In [None]:
display(pd.DataFrame(gc.get_stats()))

In [None]:
plt.figure()
plt.plot(_df.index)

In [None]:
_partition.compute()

In [None]:
(unix_e-unix_s)*1e-9/3600

In [None]:
display(pd.DataFrame(gc.get_stats()))

In [None]:
LHC.Fill

In [None]:
test = dd.read_parquet(data_path + f'/HX:FILLN={fill}',columns=[LHC.Fill]).compute()
test.sort_index()

In [None]:
test.index[0]

In [None]:
for bmode_path in Path(data_path + f'/HX:FILLN={fill}').glob("*"):
    if 'STABLE' in str(bmode_path):
        print(bmode_path)

In [None]:
bmode_path.contains('FIL')