### Intro

In this notebook, we will model the light curves using the trained model from the previous notebook

### Import

In [1]:
import PREDICTION as por #Importing PREDICTION_onePDF module from the package
from PREDICTION import * #Importing importing all packages from PREDICTION_onePDF module
#The functions plot_function2, back_x and back_y must be imported separately
from PREDICTION import plot_function2, back_x, back_y, find_LC_transform 

  from .autonotebook import tqdm as notebook_tqdm


### Model Hyperparameters

In [4]:
#Model hyperparameters
encoding_size = 128 #Encoder MLP layer size
latent_size = 128 #Latent dimension size
no_latent_samples = 10 #The number of samples of the latent space to take

attention_type = 'scaledot' #Can also use multihead, but scaledot works better
cross_attention = True #Whether to include cross-attention in the deterministic path
self_attention = True #Whether to include self-attention in both paths

lstm_layers = 0 #The number of LSTM layers to use for pre-encoding
lstm_size = 32 #The size of the LSTM layer

replace_lstm_with_gru = False # Whether to use a GRU instead of an LSTM
bidirectional = False #Whether to use bidirectional LSTM/GRU layers
lstm_agg = False #Whether to aggregate the latent space representations via an LSTM instead of mean pooling
activation = 'relu' #Can also make it 'leaky' for LeakyReLu but ReLu seems to work better
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #defining the device for testing, it can be CPU or CUDA

### TF and Param Hyperparameters

In [7]:
#The paths for the transfer function and parameters if they exist
tf_and_param = False #Set false if using real data without transfer function and parameter
if tf_and_param:
    TF_PATH = f'Transfer_Functions/'
    param_df_path = 'Parameters.csv' #If there is no parameters dataframe, it should be None
    parameters_df = pd.read_csv(param_df_path)
    param_columns=['Log_Mass','Inclination','Log_Tau','z','Eddington_Ratio','SFinf'] #Change to the names of your columns
    param_length = len(param_columns)
    TF_SAVE_PATH_TRAIN = f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/train/'
    TF_SAVE_PATH_TEST = f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/test/'
    TF_SAVE_PATH_VAL = f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/val/'
    PARAM_SAVE_PATH_TRAIN = f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/train/'
    PARAM_SAVE_PATH_VAL = f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/val/'
    PARAM_SAVE_PATH_TEST = f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/test/'
    beta_param = 0.1 #Change to the value you trained
    beta_tf = 0.1 #Change to the value you trained
    transfer_function_length = 1000 #Change to the appropriate tf_length
else:
    TF_PATH = None
    param_df = None
    param_columns = []
    param_length = 0
    beta_param = 0
    beta_tf = 0
    transfer_function_length = 0

### Creating Folders

We remove the padding from the curves. Thus, if you would like to train the model more, please save a copy of the dataset folder

In [11]:
#The folder that all the files are in
full_folder = './'
suffix = f'LCs' #Whatever suffix you gave to the output files

In [13]:
#Creating the predictions folder
os.makedirs(f'{full_folder}/output_{suffix}/predictions',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/test',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/train',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Transfer_Functions/val',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/test',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/train',exist_ok=True)
os.makedirs(f'{full_folder}/output_{suffix}/predictions/Parameter_Predictions/val',exist_ok=True)
por.create_prediction_folders(f'{full_folder}/output_{suffix}/predictions')

Folder already exists: .//output_LCs/predictions\train
Folder already exists: .//output_LCs/predictions\train\plots
Folder already exists: .//output_LCs/predictions\train\data
Folder already exists: .//output_LCs/predictions\test
Folder already exists: .//output_LCs/predictions\test\plots
Folder already exists: .//output_LCs/predictions\test\data
Folder already exists: .//output_LCs/predictions\val
Folder already exists: .//output_LCs/predictions\val\plots
Folder already exists: .//output_LCs/predictions\val\data


In [15]:
#deleting the padded values from test set
folder_path = f'{full_folder}/dataset_{suffix}/test/'
por.remove_padded_values_and_filter(folder_path)

folder_path = f'{full_folder}/dataset_{suffix}/train/' 
por.remove_padded_values_and_filter(folder_path)

folder_path = f'{full_folder}/dataset_{suffix}/val/'  
por.remove_padded_values_and_filter(folder_path)

Removed padding in file: 35_split49.csv
Removed padding in file: 58_split41.csv
Removed padding in file: 5_split96.csv
Removed padding in file: 60_split75.csv
Removed padding in file: 11_split48.csv
Removed padding in file: 14_split32.csv
Removed padding in file: 15_split6.csv
Removed padding in file: 16_split65.csv
Removed padding in file: 17_split26.csv
Removed padding in file: 18_split83.csv
Removed padding in file: 1_split82.csv
Removed padding in file: 20_split36.csv
Removed padding in file: 21_split16.csv
Removed padding in file: 22_split21.csv
Removed padding in file: 23_split88.csv
Removed padding in file: 24_split10.csv
Removed padding in file: 25_split5.csv
Removed padding in file: 27_split78.csv
Removed padding in file: 28_split67.csv
Removed padding in file: 29_split24.csv
Removed padding in file: 2_split46.csv
Removed padding in file: 30_split80.csv
Removed padding in file: 32_split47.csv
Removed padding in file: 34_split76.csv
Removed padding in file: 36_split99.csv
Remov

In [16]:
MODEL_PATH = f"{full_folder}/output_{suffix}/model_{suffix}.pth" #path to saved model
#Path to the data
DATA_PATH_TRAIN = f"{full_folder}/dataset_{suffix}/train"
DATA_PATH_VAL = f"{full_folder}/dataset_{suffix}/val"
DATA_PATH_TEST= f"{full_folder}/dataset_{suffix}/test"
OUTPUT_PATH = f'{full_folder}/output_{suffix}/predictions/' #path where to save the plots and data

In [19]:
#clearing outputh dir if you want. Uncomment the next line
#clear_output_dir=por.prepare_output_dir(OUTPUT_PATH)

In [21]:
#Load the saved model
model = por.load_trained_model(MODEL_PATH, device, encoding_size,latent_size,latent_mlp_size=encoding_size,attention = cross_attention,self_attention=self_attention,no_latent_space_sample=no_latent_samples,lstm_layers = lstm_layers,lstm_agg = lstm_agg,lstm_size=lstm_size,transfer_function_length=transfer_function_length,parameters_length = param_length,classes = 0,replace_lstm_with_gru=replace_lstm_with_gru
                                ,activation=activation,bidirectional=bidirectional)
#loading criterion and metrics
criterion, mseMetric=por.get_criteria()

In [23]:
#loading the coeffitients to transform back from [-2,2] to the real values
tr=por.load_trcoeff(f'{full_folder}/TR_Coeffs/trcoeff_{suffix}.pickle')

In [25]:
#The number of target points you want to model
num_target_smooth = 100
#Load train data
trainLoader=por.load_train_data(DATA_PATH_TRAIN,num_target_smooth=num_target_smooth,tf_dir = TF_PATH,param_df=param_df,param_columns=param_columns,class_labels_df = None)
#Load test data
testLoader=por.load_test_data(DATA_PATH_TEST,num_target_smooth=num_target_smooth,tf_dir = TF_PATH,param_df=param_df,param_columns=param_columns,class_labels_df = None)
#Load val data
valLoader=por.load_val_data(DATA_PATH_VAL,num_target_smooth=num_target_smooth,tf_dir = TF_PATH,param_df=param_df,param_columns=param_columns,class_labels_df = None)

In [27]:
#Defining empty lists to store predictions
names = []
all_z = []
all_R = []
all_full_rep = []
all_predicted_tfs = []
all_predicted_params = []

## Reconstruction 

#### Test

In [31]:
#Get the names of the light curves and the transfer_function and params
transfer_functions_test = []
params_test = []
for i in testLoader:
    names.append(i['lcName'][0].split('_')[0])
    if tf_and_param:
        transfer_functions_test.append(i['transfer_function'][0].numpy())
        params_test.append(i['parameters'][0].numpy())
params_test = np.array(params_test)
transfer_functions_test = np.array(transfer_functions_test)

In [33]:
#prediction and ploting of test data
testMetrics,z_test,R_test, agg_R_z_test,predicted_params_test,predicted_tf_test,predicted_classes =por.plot_test_data(model, testLoader, criterion, mseMetric, plot_function2, device, tr,OUTPUT_PATH,beta_param = beta_param,beta_classifier=0,beta_tf=beta_tf)

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.02s/it]


<Figure size 640x480 with 0 Axes>

In [35]:
#Save all the results to arrays
all_z.append(z_test)
all_R.append(R_test)
all_full_rep.append(agg_R_z_test)
all_predicted_tfs.append(predicted_tf_test)
all_predicted_params.append(predicted_params_test)

In [37]:
#saving test metrics
savetest=por.save_test_metrics(OUTPUT_PATH, testMetrics)

#### Train

In [40]:
#Get the names of the light curves and the transfer_function and params
transfer_functions_train = []
params_train = []
for i in trainLoader:
    names.append(i['lcName'][0].split('_')[0])
    if tf_and_param:
        transfer_functions_train.append(i['transfer_function'][0].numpy())
        params_train.append(i['parameters'][0].numpy())
params_train = np.array(params_train)
transfer_functions_train = np.array(transfer_functions_train)

In [41]:
#prediction and ploting of train data
trainMetrics,z_train,R_train,agg_R_z_train,predicted_params_train,predicted_tf_train,predicted_classes =por.plot_train_light_curves(model, trainLoader,criterion, mseMetric, plot_function2, device,tr,OUTPUT_PATH,beta_param = beta_param,beta_classifier=0,beta_tf=beta_tf)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:23<00:00,  3.34it/s]


<Figure size 640x480 with 0 Axes>

In [44]:
#Save all the results to arrays
all_z.append(z_train)
all_R.append(R_train)
all_full_rep.append(agg_R_z_train)
all_predicted_tfs.append(predicted_tf_train)
all_predicted_params.append(predicted_params_train)

In [46]:
#Save the train metrics
savetrain=por.save_train_metrics(OUTPUT_PATH, trainMetrics)

#### Validation

In [49]:
#Get the names of the light curves and the transfer_function and params
transfer_functions_val = []
params_val = []
for i in valLoader:
    names.append(i['lcName'][0].split('_')[0])
    if tf_and_param:
        transfer_functions_val.append(i['transfer_function'][0].numpy())
        params_val.append(i['parameters'][0].numpy())
params_val = np.array(params_val)
transfer_functions_val = np.array(transfer_functions_val)

In [51]:
#Prediction and plotting of val data
valMetrics,z_val,R_val,agg_R_z_val,predicted_params_val,predicted_tf_val,predicted_classes =por.plot_val_curves(model, valLoader,criterion, mseMetric, plot_function2, device,tr,OUTPUT_PATH,beta_param = beta_param,beta_classifier=0,beta_tf=beta_tf)

100%|██████████████████████████████████████████████████████████████████████████████████| 17/17 [00:05<00:00,  3.32it/s]


<Figure size 640x480 with 0 Axes>

In [53]:
#Save all the results to arrays
all_z.append(z_val)
all_R.append(R_val)
all_full_rep.append(agg_R_z_val)
all_predicted_tfs.append(predicted_tf_val)
all_predicted_params.append(predicted_params_val)

In [55]:
#Save all the hidden representations and names
with open(f'{suffix}_Rs.pickle', 'wb') as handle:
    pickle.dump(all_R, handle)
    
with open(f'{suffix}_zs.pickle', 'wb') as handle:
    pickle.dump(all_z, handle)
    
with open(f'{suffix}_full_reps.pickle', 'wb') as handle:
    pickle.dump(all_full_rep, handle)
    
with open(f'{suffix}_names.pickle', 'wb') as handle:
    pickle.dump(names, handle)