In [0]:
def get_mt_sets(mt, train_end_date, dev_end_date, test_end_date, leads_date, feats, target_col = 'target_binary'):
  """Splits the master table into train, dev, test and leads, returning data frames with the features and vectors with the targets. The leads set does not have the table defined and therefore it is not returned.

    Parameters:
    mt: full master table, stored in a Pandas Data Frame. In this Data Frame, the id_date column indicates the scoring date of the observation.
    train_end_date: Pandas Datetime object that indicates the latest train date. All observations with id_date before train_end_date are considered part of the training set.
    dev_end_date: Pandas Datetime object that indicates the latest dev date. All observations with id_date before dev_end_date and after train_end_date are also considered part of the dev set.
    test_end_date: Pandas Datetime object that indicates the latest test date. All observations with id_date test_end_date and after dev_end_date are also considered part of the test set.
    leads_date: Pandas Datetime object thad indicates the exact id_date to be used for the leads.
    feats: List of names containing the columns to be used as features for the model.
    target_col: string indicating the name of the column containing the target variable.

    Returns:
    Tuple of four data frames, containing the features (X) for each of the four sets, and three arrays, containing the target variable for train, dev and test

  """
  
  train_vct = (mt.id_date <= train_end_date)
  dev_vct = (mt.id_date > train_end_date) & (mt.id_date <= dev_end_date)
  test_vct = (mt.id_date > dev_end_date) & (mt.id_date <= test_end_date)
  leads_vct = mt.id_date == leads_date
  
  assert np.logical_and(train_vct, dev_vct).sum() == 0, 'The train and dev ests overlap'
  assert np.logical_and(test_vct, dev_vct).sum() == 0, 'The test and dev sets overlap'
  assert np.logical_and(test_vct, train_vct).sum() == 0, 'The test and train sets overlap'
  assert np.logical_and(test_vct, leads_vct).sum() == 0, 'The test and leads sets overlap'
  
  assert mt[feats].isna().sum().sum() == 0, 'There are NA values in the selected features'
  
  X_train, y_train = mt.loc[train_vct][feats].astype(float), mt.loc[train_vct, target_col]
  X_dev, y_dev = mt.loc[dev_vct][feats].astype(float), mt.loc[dev_vct, target_col]
  X_test, y_test = mt.loc[test_vct][feats].astype(float), mt.loc[test_vct, target_col]
  X_leads, _ = mt.loc[leads_vct][feats].astype(float), None
  
  mt.loc[train_vct, 'set'] = 'train'
  mt.loc[dev_vct, 'set'] = 'dev'
  mt.loc[test_vct, 'set'] = 'test'
  mt.loc[leads_vct, 'set'] = 'leads'
  
  return X_train, X_dev, X_test, X_leads, y_train, y_dev, y_test