In [None]:
### Filter the lightcurves and save to a new directory which will be input into MicroLIA's training_set module ###

import os
import numpy as np

original_path = '/Users/daniel/Downloads/ELASTICC2_i_Constant/' # The directory initially provided by Karen containing all the lightcurves for every class
new_path = '/Users/daniel/Downloads/ELASTICC2_i_filtered/' # A new directory where the filtered lightcurves will be saved (must contain empty subdirectories corresponding to the below categories!)

# All compiled classes, which are the folder names. NOTE: None of the Mdwarf-flare have has_peak and has_variability!
categories = ['Cepheid', 'd-Sct', 'uLens-Single_PyLIMA', 'EB', 'RRL', 'uLens-Binary', 'uLens-Single-GenLens', 'Constant'] #'dwarf-nova', 'Mdwarf-flare'

num_to_save = 1000 # Will only save 1000 lightcurves per class 
sigma_depth = 5 # Will compute the 5σ depth to remove measurements 1σ below this limit
saturation_limit_i = 15.8 # LSST saturation limit for i-band (https://www.lsst.org/sites/default/files/docs/sciencebook/SB_3.pdf)

for category in categories:
	if category != 'Constant':
		# Load the lightcurve names and filtering criteria as saved by Karen
		filter_results = np.loadtxt(original_path+'filters_results_'+category+'.txt', skiprows=1, dtype=str)
		#
		# Set the four recommended criteria
		valid = high_chi2 = has_peak = has_variability = 'True' #columns 2,3,4,5
		#
		# Index the according to the above criteria 
		indices = np.where((filter_results[:,2] == valid) & (filter_results[:,3] == high_chi2) & (filter_results[:,4] == has_peak) & (filter_results[:,5] == has_variability))[0]
		print(f'Class: {category} | No. of Good Lightcurves: {len(indices)}')
		# These are the names of the good, filtered lightcurves
		lc_file_names = np.array(['/'+file.split('/')[-1] for file in filter_results[:,0]])[indices]
	else:
		# No need to filter the constant lightcurves, all are assumed to be good
		lc_file_names = np.array(['/'+file.split('/')[-1] for file in os.listdir(original_path+category)  if '.dat' in file])
	#
	# Loop through the lightcurve files and save to the new directory
	counter = 0 # To determine how many good lightcurves are being saved
	for i in range(len(lc_file_names)):
		# Load the lightcurve data
		data = np.loadtxt(original_path+category+lc_file_names[i])
		mag, magerr = data[:,1], data[:,2]
		#
		# Mask out bad data points
		five_sigma_depth = np.mean(mag) + sigma_depth * np.mean(magerr) # The 5σ depth computed with means instead of medians
		#
		# Select only the measurements brighter than the 5σ depth and dimmer than the saturation limit
		mask = np.where(((mag + magerr) < five_sigma_depth) & ((mag - magerr) > saturation_limit_i))[0]
		#
		# Save the masked lightcurve if it contains at least 10 points
		if len(mask) >= 10:
			np.savetxt(new_path+category+lc_file_names[i], data[mask])
			counter += 1
			if counter == num_to_save:
				break


### Generate the training set using the training_set.load_all() function, therefore will use the actual saved lightcurves in lieu of simulations ### 

from MicroLIA import training_set 

new_path = '/Users/daniel/Downloads/ELASTICC2_i_filtered/' # Where the filtered lightcurves were saved as outlined above

zeropoint_i = 27.85 # Instrumental zeropoint for LSST (https://smtn-002.lsst.io)
convert = True # To convert from apparent magnitudes to flux
apply_weights = True # Will consider the magnitude errors when calculating the lightcurve statistics 
save_file = True # Whether to save the training set, as by default it only returns it (note that this will always save to the local home directory)
filename = 'Filtered_Training_Set_1000' # Pandas dataframe will be saved with the following name: 'MicroLIA_Training_Set_'+filename+'.csv'

data_x, data_y = training_set.load_all(new_path, convert=convert, zp=zeropoint_i, filename=filename, apply_weights=apply_weights, save_file=save_file)


### Train and optimize the classifiers ###

import os
import pandas as pd 
from MicroLIA import ensemble_model

# Load the training set file generated by the training_set module
filename = 'Filtered_Training_Set_1000'
file_path = os.path.expanduser('~/MicroLIA_Training_Set_'+filename+'.csv') # Pandas dataframe that was saved, by design it is always saved in local home!
training_set = pd.read_csv(file_path)

# Will only use five classes
classes_to_use = ['Constant', 'd-Sct', 'EB', 'RRL', 'uLens-Single_PyLIMA']
training_set = training_set[training_set['label'].isin(classes_to_use)]

# Classifier set up
optimize = True # Whether to run the optimization routine
n_iter = 250 # Will run the model hyperparameter routine for 250 trials
impute = True # Whether to impute NaN values
opt_cv = 5 # Will optimize using 5-fold cross-validation
boruta_trials = 250 # Will run the feature selection routine for 250 trials

# The feature importances will be ranked using an XGBoost model (more conservative than the 'rf' option, both are worth exploring)
boruta_model = 'xgb' # Note that this 'xgb' method fails with large datasets, bug is fixed and pushed to GitHub

clf = 'xgb' # The model to train, this is the XGBoost

model = ensemble_model.Classifier(clf=clf, training_data=training_set, optimize=optimize, 
	opt_cv=opt_cv, boruta_trials=boruta_trials, boruta_model=boruta_model, n_iter=n_iter, impute=impute)

model.create() 
model.save(dirname=f'Filtered_Training_Set_1000_{clf}_Model')

# Now optimize and save a Random Forest model

model.clf = 'rf'

model.create() 
model.save(dirname=f'Filtered_Training_Set_1000_{clf}_Model')


### Load the saved classification models and review performance ###

import os
import numpy as np 
import pandas as pd
from MicroLIA import ensemble_model

# Load the training set file generated by the training_set module
filename = 'Filtered_Training_Set_1000'
file_path = os.path.expanduser('~/MicroLIA_Training_Set_'+filename+'.csv') # Pandas dataframe that was saved, by design it is always saved in local home!
training_set = pd.read_csv(file_path)

# Will only use five classes
classes_to_use = ['Constant', 'd-Sct', 'EB', 'RRL', 'uLens-Single_PyLIMA']
training_set = training_set[training_set['label'].isin(classes_to_use)]

# Classifier set up -- will investigate the saved XGBoost model for the below example
clf = 'xgb'
opt_cv = 5 # Will optimize using 5-fold cross-validation
impute = True # Whether to impute NaN values

# Instantiate the Classifier and use the load method, note that the load path corresponds to the dirname I used above when saving the model
model = ensemble_model.Classifier(clf=clf, training_data=training_set, opt_cv=opt_cv, impute=impute)
model.load(path=f'Filtered_Training_Set_1000_{clf}_Model')

# Can output the following plots
model.plot_hyper_opt(ylim=(0.84,0.9))
model.plot_conf_matrix(k_fold=opt_cv, title='XGBoost Model (5-Fold CV)', savefig=False)
model.plot_roc_curve(k_fold=opt_cv)
model.plot_feature_opt(feat_names='default', flip_axes=True, savefig=False)
model.plot_tsne()

## ## 
## ##
## ## 

### Below I show how to make new predictions, we will predict the simulated best-score lightcurves ###

# Let's classify all the single lens events in the best scored dataset shared by Karen 
all_single_lenses = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Single_PyLIMA/'
fnames = [fname for fname in os.listdir(all_single_lenses) if '.dat' in fname]

zeropoint_i = 27.85 # Instrumental zeropoint for LSST (https://smtn-002.lsst.io)
convert = True # To convert from apparent magnitudes to flux
apply_weights = True # Will consider the magnitude errors when calculating the lightcurve statistics 

# We can store all the predictions in a single 3-D array
predictions_single_lens = np.zeros((len(fnames), len(classes_to_use), 2)) # The last axis is 2 because the prediction method returns two columns, the class label and corresponding probability prediction

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_single_lenses+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_single_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)

# The classes used to generate the above model were: ['Constant', 'd-Sct', 'EB', 'RRL', 'uLens-Single_PyLIMA']
# These are converted to numerical labels for the XGBoost model by design
# Furthermore these are sorted in alphabetical order therefore: Constant=0, EB=1, RRL=2, d-Sct=3, uLens-Single_PyLIMA=4

# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4

positive_detections_single = 0
for i in range(len(predictions_single_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	if np.argmax(predictions_single_lens[i][:,1]) == 4: #The second column holds the probability predictions
		positive_detections_single += 1

print(f"Correctly classified {positive_detections_single} out of {len(predictions_single_lens)} uLens-Single_PyLIMA ({np.round(100*(positive_detections_single/len(predictions_single_lens)),4)}%)")

## ## 

# Now check how many of the binary lenses (which were not used for training) are classified as single lenses
all_binary_lenses = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Binary/'
fnames = [fname for fname in os.listdir(all_binary_lenses) if '.dat' in fname]

# We can store all the predictions in a single 3-D array
predictions_binary_lens = np.zeros((len(fnames), len(classes_to_use), 2)) # The last axis is 2 because the prediction method returns two columns, the class label and corresponding probability prediction

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_binary_lenses+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_binary_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)


# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4
positive_detections_binary = 0
for i in range(len(predictions_binary_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	if np.argmax(predictions_binary_lens[i][:,1]) == 4: #The second column holds the probability predictions
		positive_detections_binary += 1

print(f"Correctly classified {positive_detections_binary} out of {len(predictions_binary_lens)} uLens-Binary ({np.round(100*(positive_detections_binary/len(predictions_binary_lens)),4)}%)")

## ## 

# Now check how many of the single gen lenses (which were not used for training) are classified as single lenses

all_single_gen_lens = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Single-GenLens/'
fnames = [fname for fname in os.listdir(all_single_gen_lens) if '.dat' in fname]

# We can store all the predictions in a single 3-D array
predictions_single_gen_lens = np.zeros((len(fnames), len(classes_to_use), 2)) # The last axis is 2 because the prediction method returns two columns, the class label and corresponding probability prediction

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_single_gen_lens+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_single_gen_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)


# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4
positive_detections_gen_lens = 0
for i in range(len(predictions_single_gen_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	if np.argmax(predictions_single_gen_lens[i][:,1]) == 4: #The second column holds the probability predictions
		positive_detections_gen_lens += 1

print(f"Correctly classified {positive_detections_gen_lens} out of {len(predictions_single_gen_lens)} uLens-Single-GenLens ({np.round(100*(positive_detections_gen_lens/len(predictions_single_gen_lens)),4)}%)")

## ## 

# Plot a histogram to visualize the probability predictions of all three microlensing classes

import matplotlib.pyplot as plt

# Extract the microlensing probabilities only for all predictions (column 1, row 4)
single_lens_probas = predictions_single_lens[:, 4, 1]
binary_lens_probas = predictions_binary_lens[:, 4, 1]
single_gen_lens_probas = predictions_single_gen_lens[:, 4, 1]

# Histograms
hist_single_lens, bins_single_lens = np.histogram(single_lens_probas, bins=120)
hist_single_gen_lens, bins_single_gen_lens = np.histogram(single_gen_lens_probas, bins=120)
hist_binary_lens, bins_binary_lens = np.histogram(binary_lens_probas, bins=120)

# Normalize the histograms by dividing by the total number of samples
hist_single_lens_normalized = hist_single_lens / len(single_lens_probas)
hist_single_gen_lens_normalized = hist_single_gen_lens / len(single_gen_lens_probas)
hist_binary_lens_normalized = hist_binary_lens / len(binary_lens_probas)

# Plot the normalized histograms
plt.bar(bins_single_lens[:-1], hist_single_lens_normalized, width=(bins_single_lens[1] - bins_single_lens[0]), alpha=0.5, label=f'uLens-Single_PyLIMA (n={len(predictions_single_lens)}, Tot. Acc: {np.round(100*(positive_detections_single/len(predictions_single_lens)),2)}%)')
plt.bar(bins_single_gen_lens[:-1], hist_single_gen_lens_normalized, width=(bins_single_gen_lens[1] - bins_single_gen_lens[0]), alpha=0.7, label=f'uLens-Single-GenLens (n={len(predictions_single_gen_lens)}, Tot. Acc: {np.round(100*(positive_detections_gen_lens /len(predictions_single_gen_lens)),2)}%)')
plt.bar(bins_binary_lens[:-1], hist_binary_lens_normalized, width=(bins_binary_lens[1] - bins_binary_lens[0]), alpha=0.55, label=f'uLens-Binary (n={len(predictions_binary_lens)}, Tot. Acc: {np.round(100*(positive_detections_binary /len(predictions_binary_lens)),2)}%)')
plt.title('Classification of Microlensing Classes (Optimized XGBoost Model)')
plt.xlabel('Probability Prediction Lightcurve is a uLens-Single_PyLIMA'); plt.ylabel('Normalized Counts')
plt.ylim(0, 1); plt.xlim(0.8, 1); plt.legend()
plt.show()


### The exact same analysis as above but now examining the RF model ###
# Main difference is that unlike XGBoost, the RF model can have non-numerical class labels #
# If the class labels are strings, then the prediction method will return strings instead of floats including the probability prediction #

import os
import numpy as np 
import pandas as pd
from MicroLIA import ensemble_model

# Load the training set file generated by the training_set module
filename = 'Filtered_Training_Set_1000'
file_path = os.path.expanduser('~/MicroLIA_Training_Set_'+filename+'.csv') # Pandas dataframe that was saved, by design it is always saved in local home!
training_set = pd.read_csv(file_path)

# Will only use five classes
classes_to_use = ['Constant', 'd-Sct', 'EB', 'RRL', 'uLens-Single_PyLIMA']
training_set = training_set[training_set['label'].isin(classes_to_use)]

# Classifier set up -- will investigate the saved XGBoost model for the below example
clf = 'rf'
opt_cv = 5 # Will optimize using 5-fold cross-validation
impute = True # Whether to impute NaN values

# Instantiate the Classifier and use the load method, note that the load path corresponds to the dirname I used above when saving the model
model = ensemble_model.Classifier(clf=clf, training_data=training_set, opt_cv=opt_cv, impute=impute)
model.load(path=f'Filtered_Training_Set_1000_{clf}_Model')

# Can output the following plots
model.plot_hyper_opt(ylim=(0.8,0.9))
model.plot_conf_matrix(k_fold=opt_cv, title='RF Model (5-Fold CV)', savefig=False)
model.plot_roc_curve(k_fold=opt_cv)
model.plot_feature_opt(feat_names='default', flip_axes=True, savefig=False) #Same as the first XGBoost model since the feature selection was the same
model.plot_tsne() #Same as the first XGBoost model since the feature selection was the same

## ## 
## ##
## ## 

### Below I show how to make new predictions, we will predict the simulated best-score lightcurves ###

# Let's classify all the single lens events in the best scored dataset shared by Karen 
all_single_lenses = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Single_PyLIMA/'
fnames = [fname for fname in os.listdir(all_single_lenses) if '.dat' in fname]

zeropoint_i = 27.85 # Instrumental zeropoint for LSST (https://smtn-002.lsst.io)
convert = True # To convert from apparent magnitudes to flux
apply_weights = True # Will consider the magnitude errors when calculating the lightcurve statistics 

# We can store all the predictions in a single 3-D array but in this case will use np.empty instead of np.zeros since we will be storing strings since the RF model was trained with string class labels
predictions_single_lens = np.empty((len(fnames), len(classes_to_use), 2), dtype=np.dtype('U50'))  # 'U50' specifies a Unicode string with a maximum length of 50 characters

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_single_lenses+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_single_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)


# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4

positive_detections_single = 0
for i in range(len(predictions_single_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	index_highest_proba = np.argmax(predictions_single_lens[i][:,1].astype('float')) #The second column holds the probability predictions
	if predictions_single_lens[i][:,0][index_highest_proba] == 'uLens-Single_PyLIMA': #The first column are the labels
		positive_detections_single += 1

print(f"Correctly classified {positive_detections_single} out of {len(predictions_single_lens)} uLens-Single_PyLIMA ({np.round(100*(positive_detections_single/len(predictions_single_lens)),4)}%)")

## ##

# Now check how many of the binary lenses (which were not used for training) are classified as single lenses
all_binary_lenses = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Binary/'
fnames = [fname for fname in os.listdir(all_binary_lenses) if '.dat' in fname]

# We can store all the predictions in a single 3-D array
predictions_binary_lens = np.empty((len(fnames), len(classes_to_use), 2), dtype=np.dtype('U50'))  # 'U50' specifies a Unicode string with a maximum length of 50 characters

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_binary_lenses+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_binary_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)


# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4
positive_detections_binary = 0
for i in range(len(predictions_binary_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	index_highest_proba = np.argmax(predictions_binary_lens[i][:,1].astype('float')) #The second column holds the probability predictions
	if predictions_binary_lens[i][:,0][index_highest_proba] == 'uLens-Single_PyLIMA': #The first column are the labels
		positive_detections_binary += 1

print(f"Correctly classified {positive_detections_binary} out of {len(predictions_binary_lens)} uLens-Binary ({np.round(100*(positive_detections_binary/len(predictions_binary_lens)),4)}%)")

## ##

# Now check how many of the single gen lenses (which were not used for training) are classified as single lenses

all_single_gen_lens = '/Users/daniel/Downloads/ELAsTiCC_datasets/ELASTICC2_i_Valid_Chi_Constant/uLens-Single-GenLens/'
fnames = [fname for fname in os.listdir(all_single_gen_lens) if '.dat' in fname]

# We can store all the predictions in a single 3-D array
predictions_single_gen_lens = np.empty((len(fnames), len(classes_to_use), 2), dtype=np.dtype('U50')) # The last axis is 2 because the prediction method returns two columns, the class label and corresponding probability prediction

for i in range(len(fnames)):
	#print(f"{i+1} out of {len(fnames)}")
	lightcurve = np.loadtxt(all_single_gen_lens+fnames[i])
	time, mag, magerr = lightcurve[:,0], lightcurve[:,1], lightcurve[:,2]
	#
	# The model's built-in prediction method, outputs 2-D array (class label and corresponding probability prediction)
	predictions_single_gen_lens[i] = model.predict(time, mag, magerr, convert=convert, apply_weights=apply_weights, zp=zeropoint_i)


# Check how many predictions were output as uLens-Single_PyLIMA by checking whether the highest probability prediction corresponds to the label 4
positive_detections_gen_lens = 0
for i in range(len(predictions_single_gen_lens)):
	# Checks whether the highest probability is in the row corresponding to uLens-Single_PyLIMA (row 4)
	index_highest_proba = np.argmax(predictions_single_gen_lens[i][:,1].astype('float')) #The second column holds the probability predictions
	if predictions_single_gen_lens[i][:,0][index_highest_proba] == 'uLens-Single_PyLIMA': #The first column are the labels
		positive_detections_gen_lens += 1

print(f"Correctly classified {positive_detections_gen_lens} out of {len(predictions_single_gen_lens)} uLens-Single-GenLens ({np.round(100*(positive_detections_gen_lens/len(predictions_single_gen_lens)),4)}%)")

## ##

# Plot a histogram to visualize the probability predictions of all three microlensing classes

import matplotlib.pyplot as plt

# Extract the microlensing probabilities only for all predictions (column 1, row 4)
single_lens_probas = predictions_single_lens[:, 4, 1].astype('float')
binary_lens_probas = predictions_binary_lens[:, 4, 1].astype('float')
single_gen_lens_probas = predictions_single_gen_lens[:, 4, 1].astype('float')

# Histograms
hist_single_lens, bins_single_lens = np.histogram(single_lens_probas, bins=120)
hist_single_gen_lens, bins_single_gen_lens = np.histogram(single_gen_lens_probas, bins=120)
hist_binary_lens, bins_binary_lens = np.histogram(binary_lens_probas, bins=120)

# Normalize the histograms by dividing by the total number of samples
hist_single_lens_normalized = hist_single_lens / len(single_lens_probas)
hist_single_gen_lens_normalized = hist_single_gen_lens / len(single_gen_lens_probas)
hist_binary_lens_normalized = hist_binary_lens / len(binary_lens_probas)

# Plot the normalized histograms
plt.bar(bins_single_lens[:-1], hist_single_lens_normalized, width=(bins_single_lens[1] - bins_single_lens[0]), alpha=0.5, label=f'uLens-Single_PyLIMA (n={len(predictions_single_lens)}, Tot. Acc: {np.round(100*(positive_detections_single/len(predictions_single_lens)),2)}%)')
plt.bar(bins_single_gen_lens[:-1], hist_single_gen_lens_normalized, width=(bins_single_gen_lens[1] - bins_single_gen_lens[0]), alpha=0.7, label=f'uLens-Single-GenLens (n={len(predictions_single_gen_lens)}, Tot. Acc: {np.round(100*(positive_detections_gen_lens /len(predictions_single_gen_lens)),2)}%)')
plt.bar(bins_binary_lens[:-1], hist_binary_lens_normalized, width=(bins_binary_lens[1] - bins_binary_lens[0]), alpha=0.55, label=f'uLens-Binary (n={len(predictions_binary_lens)}, Tot. Acc: {np.round(100*(positive_detections_binary /len(predictions_binary_lens)),2)}%)')
plt.title('Classification of Microlensing Classes (Optimized RF Model)')
plt.xlabel('Probability Prediction Lightcurve is a uLens-Single_PyLIMA'); plt.ylabel('Normalized Counts')
plt.ylim(0, 1); plt.xlim(0.8, 1); plt.legend()
plt.show()








