In [None]:
import pandas as pd
import numpy as np
import torch


In [None]:
# Specify location of data file
data_loc = '/Users/majdghrear/data/e_dir_fit'

# The data is stored in 100 pickle files each containing 10k electron recoil simulations
files_e = [data_loc+'/3D_processed_data/processed_recoils_'+str(i)+'.pk' for i in range(100) ]

In [None]:
eff_l = 3.0

# Loop through files
ind = 0 
for file in files_e:

	# Read root file
	df = pd.read_pickle(file)

	# Lists to store sparse tensors and corresponding labels
	labels = []
	sparse_tensors = []

	for index, row in df.iterrows():

		# Loop through recoils
		# Keep only points within opt length of the starting point
		# used SVD to fit and get v_SVD
		# Generate random direction, v_rand
		# v1 = vrand x vtrue
		# v2 = v1 x vtrue
		# project v_SVD onto the v1,vtrue plane and v2,vtrue plane
		# Obtain ang3D ang2D1 and ang2D2, later we fit these to a gaussian, compare tha to the original result as well as the CNN result. 

		# If recoil escapes fiducial area, skip it
		if np.max(row['x']) >= eff_l or np.min(row['x']) < -eff_l or np.max(row['y']) >= eff_l or np.min(row['y']) < -eff_l or np.max(row['z']) >= eff_l or np.min(row['z']) < -eff_l:
			continue

		# Initialize empty dense tensor
		voxelgrid = np.zeros(dim).astype('uint8')

		# Loop the x, y, z positions in the recoil and fill in the dense tensor
		for x,y,z in zip(row['x'],row['y'],row['z']):
			voxelgrid[int((x+eff_l)/vox_l)][int((y+eff_l)/vox_l)][int((z+eff_l)/vox_l)][0] += 1

		# Convert to pytorch tensor
		voxelgrid = torch.tensor(voxelgrid)
		# Convert to sparse pytorch tensor
		vg = voxelgrid.to_sparse()

		# Store sparse tensor and corresponding label
		sparse_tensors += [vg]
		labels += [row['dir']]

	# Save sparse tensors and labels
	torch.save( sparse_tensors, data_loc+'/3D_sparse_tensors/sparse_recoils_'+str(ind)+'.pt')
	np.savetxt(data_loc+'/3D_sparse_tensors/labels_'+str(ind)+'.pt',labels)

	print("Progress: ", ind, '/99')

	ind += 1
