# Combine the Energy Info with Hits DF

1. Read HD5 files
2. Files are: `mc_info` and `mc_hits`
3. Combine `energy` from `mc_info` with `mc_hits`
4. Save new hits df

## Combining Energy with Hits
The combination is based on index positions in `mc_hits`.

In [1]:
import pandas as pd
import numpy as np
import matplotlib 

In [2]:
raw_events_path = '../../data/raw/events.h5'

In [3]:
event_info = pd.read_hdf(raw_events_path, key='/data/mc_info')
event_hits =  pd.read_hdf(raw_events_path, key='/data/mc_hits')

In [4]:
event_info.head(20)

Unnamed: 0,nu_E,type,nu_dir.x,nu.dir.y,nu.dir.z,nu.pos.x,nu.pos.y,nu.pos.z,nu.hits.start,nu.hits.end
0,15.54,-14,-0.630831,0.436518,0.641486,-85.337,154.304,35.735,0,5
1,11.458,14,0.070157,0.942066,-0.328009,19.081,169.073,-92.435,5,6
2,11.485,14,0.367514,0.049922,-0.928677,-61.314,130.036,-64.078,6,11
3,11.571,14,-0.177418,0.344584,0.921838,48.457,76.398,112.71,11,15
4,13.148,-14,-0.997387,0.065683,0.030071,138.488,-166.544,52.433,15,17
5,91.444,14,-0.155981,-0.027443,0.987379,170.351,112.86,-171.454,17,34
6,25.02,-14,-0.222299,0.871988,-0.436142,-29.961,-139.823,-39.763,34,259
7,14.765,-14,-0.454631,-0.145056,-0.878789,212.293,-7.85,172.99,259,259
8,70.096,-14,-0.574247,0.15226,-0.804399,10.753,-177.158,52.685,259,290
9,29.825,14,0.930999,-0.362261,-0.044802,85.201,146.34,-105.288,290,291


In [5]:
event_hits.head()

Unnamed: 0,h.dom_id,h.pmt_id,h.pos.x,h.pos.y,h.pos.z,h.dir.x,h.dir.y,h.dir.z,h.tot,h.t
0,40,1231,-55.897,101.8,169.059,-0.478,0.827,0.296,28,27286567.0
1,93,2879,-26.344,86.85,178.511,0.0,0.83,0.558,27,27287009.0
2,187,5769,-74.918,65.363,139.831,0.0,0.531,-0.847,25,27289060.0
3,196,6071,-74.774,65.34,56.111,0.719,0.415,0.558,30,27287235.0
4,221,6842,-65.186,50.797,160.359,-0.478,0.827,0.296,27,27286697.0


In [6]:
event_info.nu_E.min()

10.002

In [7]:
event_hits = event_hits.rename_axis('pos').reset_index()
event_hits['energy'] =''

In [8]:
event_hits

Unnamed: 0,pos,h.dom_id,h.pmt_id,h.pos.x,h.pos.y,h.pos.z,h.dir.x,h.dir.y,h.dir.z,h.tot,h.t,energy
0,0,40,1231,-55.897,101.800,169.059,-0.478,0.827,0.296,28,27286567.0,
1,1,93,2879,-26.344,86.850,178.511,0.000,0.830,0.558,27,27287009.0,
2,2,187,5769,-74.918,65.363,139.831,0.000,0.531,-0.847,25,27289060.0,
3,3,196,6071,-74.774,65.340,56.111,0.719,0.415,0.558,30,27287235.0,
4,4,221,6842,-65.186,50.797,160.359,-0.478,0.827,0.296,27,27286697.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
489901,489901,1934,59941,77.988,-40.148,130.541,0.000,-0.955,-0.296,18,72056356.0,
489902,489902,1981,61393,70.237,-59.962,196.389,0.415,-0.720,-0.556,29,72056427.0,
489903,489903,1995,61845,70.298,-59.901,65.511,0.719,-0.415,0.558,25,72056374.0,
489904,489904,2012,62366,88.385,-60.262,74.159,0.478,-0.827,0.296,24,72056345.0,


In [9]:
pd.Series(list(range(0, 1000000000, 15000)))

0                0
1            15000
2            30000
3            45000
4            60000
           ...    
66662    999930000
66663    999945000
66664    999960000
66665    999975000
66666    999990000
Length: 66667, dtype: int64

In [18]:
event_info['event_range'] = list(event_info['nu.hits.start'] 

nu_E             False
type             False
nu_dir.x         False
nu.dir.y         False
nu.dir.z         False
nu.pos.x         False
nu.pos.y         False
nu.pos.z         False
nu.hits.start    False
nu.hits.end      False
event_range      False
dtype: bool

In [37]:
event_info['event_range'] = event_info['nu.hits.start'].apply(lambda x: [x]) + event_info['nu.hits.end'].apply(lambda x: [x])


In [38]:
event_info

Unnamed: 0,nu_E,type,nu_dir.x,nu.dir.y,nu.dir.z,nu.pos.x,nu.pos.y,nu.pos.z,nu.hits.start,nu.hits.end,event_range
0,15.540,-14,-0.630831,0.436518,0.641486,-85.337,154.304,35.735,0,5,"[0, 5]"
1,11.458,14,0.070157,0.942066,-0.328009,19.081,169.073,-92.435,5,6,"[5, 6]"
2,11.485,14,0.367514,0.049922,-0.928677,-61.314,130.036,-64.078,6,11,"[6, 11]"
3,11.571,14,-0.177418,0.344584,0.921838,48.457,76.398,112.710,11,15,"[11, 15]"
4,13.148,-14,-0.997387,0.065683,0.030071,138.488,-166.544,52.433,15,17,"[15, 17]"
...,...,...,...,...,...,...,...,...,...,...,...
5730,73.038,-14,-0.119695,-0.113570,-0.986294,-102.301,10.814,259.625,489453,489455,"[489453, 489455]"
5731,79.484,14,-0.220923,0.896138,-0.384876,51.252,56.680,72.604,489455,489817,"[489455, 489817]"
5732,35.602,14,-0.665612,0.267922,0.696548,191.109,-60.389,-7.956,489817,489820,"[489817, 489820]"
5733,10.022,14,0.868500,-0.195664,0.455438,-126.753,61.647,-71.618,489820,489897,"[489820, 489897]"


In [56]:
event_hits['energy'] = event_hits.loc[event_hits['pos'].isin(range(0,5)), ]

In [57]:
event_hits

Unnamed: 0,pos,h.dom_id,h.pmt_id,h.pos.x,h.pos.y,h.pos.z,h.dir.x,h.dir.y,h.dir.z,h.tot,h.t,energy
0,0,40,1231,-55.897,101.800,169.059,-0.478,0.827,0.296,28,27286567.0,0
1,1,93,2879,-26.344,86.850,178.511,0.000,0.830,0.558,27,27287009.0,1
2,2,187,5769,-74.918,65.363,139.831,0.000,0.531,-0.847,25,27289060.0,2
3,3,196,6071,-74.774,65.340,56.111,0.719,0.415,0.558,30,27287235.0,3
4,4,221,6842,-65.186,50.797,160.359,-0.478,0.827,0.296,27,27286697.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...
489901,489901,1934,59941,77.988,-40.148,130.541,0.000,-0.955,-0.296,18,72056356.0,
489902,489902,1981,61393,70.237,-59.962,196.389,0.415,-0.720,-0.556,29,72056427.0,
489903,489903,1995,61845,70.298,-59.901,65.511,0.719,-0.415,0.558,25,72056374.0,
489904,489904,2012,62366,88.385,-60.262,74.159,0.478,-0.827,0.296,24,72056345.0,


In [64]:
event_info.head()

Unnamed: 0,nu_E,type,nu_dir.x,nu.dir.y,nu.dir.z,nu.pos.x,nu.pos.y,nu.pos.z,nu.hits.start,nu.hits.end,event_range
0,15.54,-14,-0.630831,0.436518,0.641486,-85.337,154.304,35.735,0,5,"[0, 5]"
1,11.458,14,0.070157,0.942066,-0.328009,19.081,169.073,-92.435,5,6,"[5, 6]"
2,11.485,14,0.367514,0.049922,-0.928677,-61.314,130.036,-64.078,6,11,"[6, 11]"
3,11.571,14,-0.177418,0.344584,0.921838,48.457,76.398,112.71,11,15,"[11, 15]"
4,13.148,-14,-0.997387,0.065683,0.030071,138.488,-166.544,52.433,15,17,"[15, 17]"


In [87]:
df = pd.DataFrame()

for idx, row in event_info.iterrows():
#     print("{0} \n  {1}".format(idx, row))
    slice = event_hits.iloc[row['nu.hits.start']: row['nu.hits.end']]
#     print(slice)
    slice['energy'] = row['nu_E']
    print(slice)
    df = pd.concat([df, slice])
    break

   pos  h.dom_id  h.pmt_id  h.pos.x  h.pos.y  h.pos.z  h.dir.x  h.dir.y  \
0    0        40      1231  -55.897  101.800  169.059   -0.478    0.827   
1    1        93      2879  -26.344   86.850  178.511    0.000    0.830   
2    2       187      5769  -74.918   65.363  139.831    0.000    0.531   
3    3       196      6071  -74.774   65.340   56.111    0.719    0.415   
4    4       221      6842  -65.186   50.797  160.359   -0.478    0.827   

   h.dir.z  h.tot         h.t  energy  
0    0.296     28  27286567.0   15.54  
1    0.558     27  27287009.0   15.54  
2   -0.847     25  27289060.0   15.54  
3    0.558     30  27287235.0   15.54  
4    0.296     27  27286697.0   15.54  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [88]:
df

Unnamed: 0,pos,h.dom_id,h.pmt_id,h.pos.x,h.pos.y,h.pos.z,h.dir.x,h.dir.y,h.dir.z,h.tot,h.t,energy
0,0,40,1231,-55.897,101.8,169.059,-0.478,0.827,0.296,28,27286567.0,15.54
1,1,93,2879,-26.344,86.85,178.511,0.0,0.83,0.558,27,27287009.0,15.54
2,2,187,5769,-74.918,65.363,139.831,0.0,0.531,-0.847,25,27289060.0,15.54
3,3,196,6071,-74.774,65.34,56.111,0.719,0.415,0.558,30,27287235.0,15.54
4,4,221,6842,-65.186,50.797,160.359,-0.478,0.827,0.296,27,27286697.0,15.54


In [80]:
for i in event_info[['nu_E', 'event_range']].values:
    print(i[0])
    slice = event_hits.loc[event_hits['pos'].isin(range(0,5))]
    slice['energy'] = 15.5
    print(slicea)
#     print(event_hits.head())
    break

15.54
   pos  h.dom_id  h.pmt_id  h.pos.x  h.pos.y  h.pos.z  h.dir.x  h.dir.y  \
0    0        40      1231  -55.897  101.800  169.059   -0.478    0.827   
1    1        93      2879  -26.344   86.850  178.511    0.000    0.830   
2    2       187      5769  -74.918   65.363  139.831    0.000    0.531   
3    3       196      6071  -74.774   65.340   56.111    0.719    0.415   
4    4       221      6842  -65.186   50.797  160.359   -0.478    0.827   

   h.dir.z  h.tot         h.t  energy  
0    0.296     28  27286567.0    15.5  
1    0.558     27  27287009.0    15.5  
2   -0.847     25  27289060.0    15.5  
3    0.558     30  27287235.0    15.5  
4    0.296     27  27286697.0    15.5  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [85]:
data = zip(event_info['nu.hits.start'],
           event_info['nu.hits.end'],
           event_info['nu_E'])
a
for a, b, c in data:
    event_hits['energy'] = np.where(event_hits.pos.between(a, b, inclusive=True), c, "Unfilled")  
    

KeyboardInterrupt: 