In [49]:
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, cohen_kappa_score
from sklearn.model_selection import GroupKFold

In [50]:
df = pd.read_csv("../../new/datasets/balanced_grouped.csv")

In [51]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,335,336,337,338,339,340,341,342,group,id
0,0.015594,0.010444,0.021063,0.025866,0.023642,0.026157,0.032252,0.027204,0.012955,0.010038,...,0.822413,0.854666,0.898153,0.943331,0.963960,1.000000,0.989876,0.990877,9,1
1,0.331350,0.262977,0.212018,0.300518,0.377132,0.383542,0.231755,0.086373,0.090431,0.221658,...,0.622610,0.634173,0.721204,0.828365,0.825327,0.854831,0.801820,0.771081,6,1
2,0.134930,0.075620,0.062616,0.044667,0.121870,0.126496,0.103467,0.070279,0.098824,0.085445,...,0.934640,0.942650,0.890793,0.946226,0.917741,1.000000,0.951573,0.985645,5,1
3,0.109130,0.129643,0.087304,0.051216,0.052661,0.060745,0.052967,0.048967,0.062728,0.064739,...,0.846347,0.877817,0.918180,0.958194,0.972685,1.000000,0.981704,0.972349,7,1
4,0.224262,0.227079,0.146116,0.080234,0.091311,0.125116,0.114054,0.055488,0.044267,0.103987,...,0.032131,0.029224,0.037412,0.051124,0.052330,0.076105,0.075062,0.095761,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,0.522966,0.397591,0.313419,0.284905,0.271822,0.246843,0.243718,0.294817,0.378934,0.408562,...,0.707678,0.711110,0.724560,0.741992,0.744172,0.769042,0.766855,0.788741,3,3
2996,0.201920,0.160191,0.144714,0.130479,0.123315,0.146114,0.197060,0.225130,0.199775,0.155762,...,0.681672,0.674718,0.676407,0.681894,0.675200,0.689825,0.683288,0.699186,3,3
2997,0.000000,0.162607,0.413518,0.572730,0.515844,0.394520,0.409770,0.492245,0.425406,0.261534,...,0.840853,0.764674,0.685685,0.607052,0.532962,0.483535,0.458896,0.475790,5,3
2998,0.257628,0.097062,0.009845,0.092240,0.235695,0.254916,0.118284,0.000000,0.053772,0.193354,...,0.975907,0.949945,0.933002,0.920661,0.897001,0.901540,0.889620,0.910721,9,3


In [52]:
# Separamos las características y la variable objetivo
X = df.iloc[:, :-2]
y = df.iloc[:, -1] - 1
# y.iloc[:, 0] = y.iloc[:, 0] - 1
group = df.iloc[:, -2]

In [55]:
gkf = GroupKFold(n_splits=10)

# Guardar las métricas para calcular promedios
accuracies = []

# 4. Crear un DataFrame para almacenar las métricas
df_results = pd.DataFrame(columns=['Fold', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Kappa'])

In [56]:
fold_number = 1
for train_index, test_index in gkf.split(X, y, group):
    # print(train_index , len(train_index))
    # print(test_index, len(test_index))
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # crear el modelo
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # compilar el modelo
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # entrenar el modelo
    history = model.fit(X_train, y_train, epochs=100, batch_size=50, validation_data=(X_test, y_test))

    # predecir las probabilidades para los datos de prueba
    y_probs = model.predict(X_test)
    y_pred = np.argmax(y_probs, axis=-1)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)

        # Calcular las métricas
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    kappa = cohen_kappa_score(y_test, y_pred)

    # Guardar los resultados en el DataFrame
    df_results = df_results.append({
        'Fold': fold_number,
        'Accuracy': acc,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Kappa': kappa
    }, ignore_index=True)
    fold_number += 1


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  df_results = df_results.append({


In [57]:
df_results

Unnamed: 0,Fold,Accuracy,Precision,Recall,F1-Score,Kappa
0,1.0,0.944805,0.947488,0.944805,0.945072,0.917229
1,2.0,0.883333,0.89102,0.883333,0.882066,0.825
2,3.0,0.923333,0.932514,0.923333,0.924476,0.885
3,4.0,0.896667,0.89927,0.896667,0.89443,0.845
4,5.0,0.926667,0.92929,0.926667,0.926388,0.89
5,6.0,0.926421,0.931456,0.926421,0.926638,0.889623
6,7.0,0.919732,0.922921,0.919732,0.918512,0.879615
7,8.0,0.886288,0.897014,0.886288,0.882648,0.829484
8,9.0,0.922819,0.93291,0.922819,0.920468,0.884217
9,10.0,0.949495,0.95129,0.949495,0.949701,0.924242


In [58]:
# Calcular el promedio y la desviación estándar de cada métrica
metrics_mean = df_results[['Accuracy', 'Precision', 'Recall', 'F1-Score', 'Kappa']].mean()
metrics_std = df_results[['Accuracy', 'Precision', 'Recall', 'F1-Score', 'Kappa']].std()


In [60]:
metrics_mean

Accuracy     0.917956
Precision    0.923517
Recall       0.917956
F1-Score     0.917040
Kappa        0.876941
dtype: float64

In [61]:
metrics_std

Accuracy     0.022543
Precision    0.020972
Recall       0.022543
F1-Score     0.023614
Kappa        0.033809
dtype: float64

# Save model

In [67]:
# Save the model to a single file
model.save( './rna_balance.keras', save_format='keras_v3')

# Suspected Symbiotic Stars v1

In [29]:
import numpy as np
from sklearn.metrics import confusion_matrix

df_sus_sy = pd.read_csv("../../new/candidate_symbiotic_stars_v1/normalized/Suspected_SY.csv", header=None)

# predecir las probabilidades para los datos de prueba
y_probs = model.predict(df_sus_sy)
y_pred = np.argmax(y_probs, axis=-1)
cm = confusion_matrix([0 for _ in range(len(df_sus_sy))], y_pred, labels=[0, 1, 2])
print(cm)

[[14  0  1]
 [ 0  0  0]
 [ 0  0  0]]


In [30]:
df_sus_sy_normalized = pd.read_csv("../../new/candidate_symbiotic_stars_v1/calibrated_data/Suspected_SY.csv")

df5 = pd.DataFrame(y_probs)
df5 = df5.round(4)
df5['label'] = y_pred
df5.insert(0, 'source_id', df_sus_sy_normalized['source_id'])
df5.head(5)

Unnamed: 0,source_id,0,1,2,label
0,4687286621186701568,0.999,0.0008,0.0001,0
1,4651824725526390016,0.9098,0.0877,0.0025,0
2,3321366590173335424,0.9483,0.041,0.0106,0
3,5410876219867043072,0.9999,0.0001,0.0,0
4,3575939163051304192,0.9638,0.0333,0.0029,0


In [31]:
df6 = pd.read_csv('../../new/candidate_symbiotic_stars_v1/built_dataset/suspected_SY_dataset.csv')
df6.head(5)

Unnamed: 0,FIND_NAME,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,RAW 1691,LIN 521,C*,C,Gaia DR2 4687286621186701568,RAW 1691|LIN 521|2MASS J01183570-7242213|OGLE ...,C*|Em*|LP*|LP*|Em*|MIR|NIR|*|C*?|LP?,4687286621186701568
1,[BE74] 583,[BE74] 583,LongPeriodV*,G/Ke:,Gaia DR2 4651824725526390016,2MASS J05265014-7106350|EROS2-star lm058-2n-25...,LP*|Em*|NIR|V*|*,4651824725526390016
2,StHA 55,EM* StHA 55,Mira,,Gaia DR3 3321366590173335424,IRAS 05440+0642|ASAS J054642+0643.7|ASAS J0546...,Mi*|LP*|V*|LP*|SB*|LP*|MIR|V*|Em*|NIR|*|C*?|IR...,3321366590173335424
3,ZZ CMi,V* ZZ CMi,LongPeriodV*,M6I-IIep,Gaia DR3 3155368612444708096,BD+09 1633|AN 306.1934|DO 2156|GCRV 4915|G...,LP*|NIR|V*|*|IR|LP?,3155368612444708096
4,WRAY 16−51,WRAY 16-51,LongPeriodV*,M4,Gaia DR2 5410876219860836224,IRAS 09316-4621|AKARI-IRC-V1 J0933295-463450|D...,LP*|NIR|MIR|Em*|PN|*|IR,5410876219867043072


In [32]:
# Filtro de data frames
df_filtered = df6.merge(df5, left_on=['Gaia DR3'], right_on=['source_id'], how='inner', indicator=True)
df_filtered = df_filtered[df_filtered['_merge'] == 'both']
df_filtered = df_filtered.iloc[:, [0, 1, 7, 9, 10, 11, 12]]
df_filtered.head(5)

Unnamed: 0,FIND_NAME,MAIN_ID,Gaia DR3,0,1,2,label
0,RAW 1691,LIN 521,4687286621186701568,0.999,0.0008,0.0001,0
1,[BE74] 583,[BE74] 583,4651824725526390016,0.9098,0.0877,0.0025,0
2,StHA 55,EM* StHA 55,3321366590173335424,0.9483,0.041,0.0106,0
3,WRAY 16−51,WRAY 16-51,5410876219867043072,0.9999,0.0001,0.0,0
4,NSV 05572,V* VX Crv,3575939163051304192,0.9638,0.0333,0.0029,0


In [33]:
out_name = 'rna_balanced.csv'
out_dir = '../../new/candidate_symbiotic_stars_v1/output'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_filtered.to_csv(fullname, header=True, index=False)

# Suspected Symbiotic Stars v2

In [34]:
import numpy as np
from sklearn.metrics import confusion_matrix

df_sus_sy = pd.read_csv("../../new/candidate_symbiotic_stars_v2/normalized/Suspected_SY.csv", header=None)

# predecir las probabilidades para los datos de prueba
y_probs = model.predict(df_sus_sy)
y_pred = np.argmax(y_probs, axis=-1)
cm = confusion_matrix([0 for _ in range(len(df_sus_sy))], y_pred, labels=[0, 1, 2])
print(cm)

[[13  1  3]
 [ 0  0  0]
 [ 0  0  0]]


In [35]:
df_sus_sy_normalized = pd.read_csv("../../new/candidate_symbiotic_stars_v2/calibrated_data/Suspected_SY.csv")

df5 = pd.DataFrame(y_probs)
df5 = df5.round(4)
df5['label'] = y_pred
df5.insert(0, 'source_id', df_sus_sy_normalized['source_id'])
df5.head(5)

Unnamed: 0,source_id,0,1,2,label
0,6204217186929931520,0.9987,0.0013,0.0,0
1,4061952680197028224,0.9085,0.0881,0.0034,0
2,670455944074475008,0.3494,0.0075,0.6431,2
3,4068755633500598272,0.011,0.0548,0.9343,2
4,2060829659152816768,0.0179,0.8098,0.1724,1


In [36]:
df6 = pd.read_csv('../../new/candidate_symbiotic_stars_v2/built_dataset/suspected_SY_dataset.csv')
df6.head(5)

Unnamed: 0,FIND_NAME,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,V748 Cen,V* V748 Cen,EclBin,Ae,Gaia DR3 6204217186929931520,CD-32 10517|ALS 18924|CRTS J145936.6-332503|CS...,EB*|Ro*|NIR|V*|Em*|*,6204217186929931520
1,WRAY 16-294,WRAY 16-294,LongPeriodV*,K5,Gaia DR2 4061952680197028224,2MASS J17391381-2538050|DENIS J173913.8-253805...,LP*|PN|NIR|Em*|*|C*?|ISM|LP?,4061952680197028224
2,DASCH J075731.1+201735,ASAS J075731+2017.6,LongPeriodV*,M0III,Gaia DR2 670455944074475008,2MASS J07573112+2017347|ASAS J075731+2017.6|DA...,SB*|LP*|NIR|V*|*|Opt,670455944074475008
3,ASAS J174600-2321.3,ASAS J174600-2321.3,LongPeriodV*_Candidate,F0I,Gaia DR2 4068755633500598272,2MASS J17460018-2321163|ASAS J174600-2321.3|ER...,NIR|V*|*|LP?,4068755633500598272
4,IPHASJ201550.96+373004.2,IRAS 20140+3720,PlanetaryNeb_Candidate,,Gaia DR2 2060829659152816768,2MASS J20155096+3730042|AKARI-IRC-V1 J2015509+...,NIR|*|C*?|IR|LP?|PN?,2060829659152816768


In [37]:
# Filtro de data frames
df_filtered = df6.merge(df5, left_on=['Gaia DR3'], right_on=['source_id'], how='inner', indicator=True)
df_filtered = df_filtered[df_filtered['_merge'] == 'both']
df_filtered = df_filtered.iloc[:, [0, 1, 7, 9, 10, 11, 12]]
df_filtered.head(5)

Unnamed: 0,FIND_NAME,MAIN_ID,Gaia DR3,0,1,2,label
0,V748 Cen,V* V748 Cen,6204217186929931520,0.9987,0.0013,0.0,0
1,WRAY 16-294,WRAY 16-294,4061952680197028224,0.9085,0.0881,0.0034,0
2,DASCH J075731.1+201735,ASAS J075731+2017.6,670455944074475008,0.3494,0.0075,0.6431,2
3,ASAS J174600-2321.3,ASAS J174600-2321.3,4068755633500598272,0.011,0.0548,0.9343,2
4,IPHASJ201550.96+373004.2,IRAS 20140+3720,2060829659152816768,0.0179,0.8098,0.1724,1


In [38]:
out_name = 'rna_balanced.csv'
out_dir = '../../new/candidate_symbiotic_stars_v2/output'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_filtered.to_csv(fullname, header=True, index=False)