In [None]:
!pip uninstall -y tensorflow tf-keras tensorflow-probability
!pip install tensorflow==2.15.0 tf-keras==2.15.0 tensorflow-probability==0.22


In [None]:
!git clone https://github.com/xl402/neural-oblivious-decision-ensembles.git node

import sys
sys.path.append("/kaggle/working/node")

from node.networks.model import NODE

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


In [None]:
# 1. Load dữ liệu
annonimized = pd.read_csv('/kaggle/input/it001-diem/annonimized.csv')
ck = pd.read_csv('/kaggle/input/it001-diem/ck-public.csv')
qt = pd.read_csv('/kaggle/input/it001-diem/qt-public.csv')
th = pd.read_csv('/kaggle/input/it001-diem/th-public.csv')


In [None]:

# 2. Tính điểm TBTL
qt['diemqt'] = pd.to_numeric(qt['diemqt'], errors='coerce')
th['TH'] = pd.to_numeric(th['TH'], errors='coerce')
df_score = ck.merge(qt, on='hash', how='outer').merge(th, on='hash', how='outer')
df_score['TBTL'] = 0.3 * df_score['diemqt'] + 0.2 * df_score['TH'] + 0.5 * df_score['CK']
df_score = df_score.dropna(subset=['TBTL'])


In [None]:
# 3. Trích xuất đặc trưng từ log
annonimized.columns = [
    "assignment_id", "problem_id", "username", "is_final", "status",
    "pre_score", "coefficient", "language_id", "created_at",
    "updated_at", "judgement"
]

group = annonimized.groupby("username")
df_feat = pd.DataFrame({
    "total_submissions": group.size(),
    "final_submissions": group["is_final"].sum(),
    "unique_assignments": group["assignment_id"].nunique(),
    "unique_problems": group["problem_id"].nunique(),
    "mean_pre_score": group["pre_score"].mean(),
    "std_pre_score": group["pre_score"].std(),
    "median_pre_score": group["pre_score"].median(),
    "accepted_ratio": group["status"].apply(lambda x: (x == "accepted").sum() / len(x)),
    "mean_coefficient": group["coefficient"].mean(),
}).reset_index()

df_feat = df_feat.fillna(0)


In [None]:


df_train = df_feat.merge(df_score, left_on='username', right_on='hash').dropna(subset=["TBTL"])
X = df_train.drop(columns=["username", "hash", "CK", "diemqt", "TH", "TBTL"])
y = df_train["TBTL"].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:

model = NODE(
    n_layers=4,               # số lớp NODE
    n_trees=256,              # mỗi lớp có 256 cây
    tree_depth=6,             # độ sâu mỗi cây
    units=2,                  # đầu ra mỗi cây
    link=tf.keras.activations.linear  # hồi quy
)

inputs = tf.keras.Input(shape=(X_train.shape[1],))
outputs = model(inputs)
node_model = tf.keras.Model(inputs=inputs, outputs=outputs)
node_model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")



In [None]:

node_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=2000,
    batch_size=256,  
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=5, factor=0.5, min_lr=1e-5)
    ]
)


In [None]:

df_all = df_feat.copy()
X_all = df_all.drop(columns=["username"])
X_all_scaled = scaler.transform(X_all)
df_all["TBTL"] = node_model.predict(X_all_scaled).flatten()


In [None]:


submission = df_all[["username", "TBTL"]].rename(columns={"username": "hash"})
submission.to_csv("submission_1.csv", index=False)
