In [None]:
import numpy as np
import pandas as pd
import root_pandas
from matplotlib import gridspec
import matplotlib.pyplot as plt
%matplotlib inline

import ROOT as r
r.gROOT.LoadMacro('/belle2work/BelleII/belle2style/Belle2Style.C') 
r.SetBelle2Style()

# Make nice looking plots
plt.rcParams.update({
          'font.size': 20,
          'figure.figsize': (12, 8),
})

train = '/belle2work/sbasil/lambdacpi0/fromBelleII/FastBDTs/train2.root'
mccols = ["Xic_isSignal"]
df_train = root_pandas.read_root(train, key='xicp', columns=["K_kaonID_noSVD","p_protonID_noSVD","Lambdac_y","Xic_alpha","Xic_DeltaM"]+mccols)

In [None]:
test = '/belle2work/sbasil/lambdacpi0/fromBelleII/FastBDTs/test2.root'
df_test = root_pandas.read_root(test, key='xicp', columns=["K_kaonID_noSVD","p_protonID_noSVD","Lambdac_y","Xic_alpha","Xic_DeltaM"]+mccols)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier


In [None]:
train_data = df_train
test_data = df_test

for c in train_data.columns:
    train_data[c] = np.nan_to_num(train_data[c], copy = False)

for c in test_data.columns:
    test_data[c] = np.nan_to_num(test_data[c], copy = False)

X_train = train_data.drop(columns=mccols+["Xic_DeltaM"])
y_train = train_data["Xic_isSignal"]

X_test = test_data.drop(columns=mccols+["Xic_DeltaM"])
y_test = test_data["Xic_isSignal"]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy

model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=[Accuracy()])

model.fit(X_train_scaled, y_train, epochs=20, batch_size=32)

y_probabilities = model.predict(X_test_scaled)

In [None]:
X_test["Xic_DeltaM"] = test_data["Xic_DeltaM"]
X_test["Xic_isSignal"] = test_data["Xic_isSignal"]
#for v in ["Lambdac_isSignal","Lambdac_mcPDG","Lambdac_genMotherPDG","pi0_mcPDG","pi0_genMotherPDG"]:
    #X_test[v] = test_data[v]

In [None]:
predictions_df = pd.DataFrame({"pred": [i[0] for i in y_probabilities],
                               "Xic_isSignal": X_test["Xic_isSignal"],
                               "Xic_DeltaM": X_test["Xic_DeltaM"],
                              })

In [None]:
#predictions_df.to_csv(index=False)

In [None]:
predictions_df = predictions_df.query("pred > 0.1")

In [None]:
ax = plt.subplot()
cuts = "Xic_DeltaM > 0"
var = "Xic_DeltaM"

charmbgs = '((abs(Lambdac_mcPDG)==411 and abs(Lambdac_genMotherPDG)==413) or (abs(Lambdac_mcPDG)==421 and abs(Lambdac_genMotherPDG)==423) or (abs(Lambdac_mcPDG)==431 and abs(Lambdac_genMotherPDG)==433))'
sigmast = '(pi0_mcPDG==111 and Lambdac_isSignal==1 and ((abs(Lambdac_genMotherPDG)==4212 and abs(pi0_genMotherPDG)==4212)))'
sigmastst = '(pi0_mcPDG==111 and Lambdac_isSignal==1 and ((abs(Lambdac_genMotherPDG)==4214 and abs(pi0_genMotherPDG)==4214)))'

npbkg = predictions_df.query(cuts + ' and Xic_isSignal == 0')[var].to_numpy()
nptrue = predictions_df.query(cuts + ' and Xic_isSignal == 1')[var].to_numpy()

ax.hist([npbkg,nptrue], bins=200, range=(0.125,0.4),
        label=["bkg","signal"], stacked=True)

#ax.hist([nptrue], bins=200, range=(0.125,0.4),
        #label=["signal"], stacked=True)
    
    
    
plt.xlabel("Xic_DeltaM")
plt.legend(loc=0)
plt.xlim((0.125,0.35))
plt.show()



In [None]:
# Create a DataFrame with the true labels, predicted labels, and other features (e.g., "distance")
predictions_df = pd.DataFrame({"pred": [i[0] for i in y_probabilities],
                               "Xic_isSignal": X_test["Xic_isSignal"],
                               "Xic_DeltaM": X_test["Xic_DeltaM"],
                              })
#base_bkg = predictions_df.query('Xic_isSignal == 0')["Xic_DeltaM"].to_numpy()
#base_sig = predictions_df.query('Xic_isSignal == 1')["Xic_DeltaM"].to_numpy()
