In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Coling2025/english_train.csv')
test = pd.read_csv('/content/drive/MyDrive/Coling2025/english_dev.csv')

In [None]:
train['label'] = train['label'].apply(lambda s: 0 if s == 'human' else 1)
test['label'] = test['label'].apply(lambda s: 0 if s == 'human' else 1)

In [None]:
train.head()

Unnamed: 0,id,essay,label
0,cc7727e8bfbc6e0051b71ceeeb928755def0f5e94b5168...,I disagree with the statement that the develop...,1
1,fa7e1bc99ea20c78df905eb3780cb835bed078b64fce6a...,I disagree with the statement that the primary...,1
2,d16ec17f6ffd059cf2eefe79ca2ccd9bdb78ea5ec2d6a0...,International sports events require the most w...,0
3,838f538cf1b0e03e51e423266844492af1a72874a224d4...,While some individuals may argue that working ...,1
4,cd9fe478b001e04bcdc276f9675b90bc8c6332a7ea8abf...,I disagree with the statement that working rem...,1


In [None]:
test.head()

Unnamed: 0,id,essay,label
0,f2c01bb1c6a76d96032879f999c94b0a6f85c7dc811789...,When faced to the questions that successful pe...,0
1,daece00c9d1a2fac77d9691df68b3c4537ac1e08569ddf...,"To start with , nowadays theres alot for peopl...",0
2,e3518500503fecb0e4993323f5b265d0ab386bd95ea2d5...,Successful people try new things because...,0
3,5796a76d2e4dcf027f3dbab6ca0fbd99c92b1a8be00645...,The path to success is rarely paved with famil...,1
4,5d7d0983750bd528407aba6e2fa8731d66b6a58bf9d920...,The pursuit of success often involves venturin...,1


In [None]:
df = pd.concat([train['essay'], test['essay']], axis=0)

vectorizer = TfidfVectorizer(stop_words='english', max_features=50000)
X = vectorizer.fit_transform(df)

In [None]:
X

<3722x20291 sparse matrix of type '<class 'numpy.float64'>'
	with 362427 stored elements in Compressed Sparse Row format>

In [None]:
lr_model = LogisticRegression()
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    lr_model.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_lr = lr_model.predict_proba(X_val)[:, 1]

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_lr)
    auc_scores.append(auc_score)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'ROC AUC for fold {i}: {score:.4f}')

print('Average ROC AUC:', round(sum(auc_scores)/len(auc_scores), 4))
print('Standard deviation:', round((sum([(x - sum(auc_scores)/len(auc_scores))**2 for x in auc_scores])/len(auc_scores))**0.5, 4))

ROC AUC for fold 1: 1.0000
ROC AUC for fold 2: 1.0000
ROC AUC for fold 3: 1.0000
ROC AUC for fold 4: 1.0000
ROC AUC for fold 5: 1.0000
Average ROC AUC: 1.0
Standard deviation: 0.0


In [None]:
preds_train = lr_model.predict_proba(X[:train.shape[0]])[:,1]
preds_test = lr_model.predict_proba(X[train.shape[0]:])[:,1]
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))

ROC AUC train: 1.0


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import numpy as np

lr_model = LogisticRegression()
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
mcc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    lr_model.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_lr = lr_model.predict_proba(X_val)[:, 1]

    # Get predictions for calculating other metrics
    preds_val = lr_model.predict(X_val)

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_lr)
    auc_scores.append(auc_score)

    # Calculate other metrics
    accuracy = accuracy_score(y_val, preds_val)
    precision = precision_score(y_val, preds_val)
    recall = recall_score(y_val, preds_val)
    f1 = f1_score(y_val, preds_val)
    mcc = matthews_corrcoef(y_val, preds_val)

    # Append the metrics to their respective lists
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'Fold {i}:')
    print(f'  ROC AUC: {score:.4f}')
    print(f'  Accuracy: {accuracy_scores[i-1]:.4f}')
    print(f'  Precision: {precision_scores[i-1]:.4f}')
    print(f'  Recall: {recall_scores[i-1]:.4f}')
    print(f'  F1 Score: {f1_scores[i-1]:.4f}')
    print(f'  MCC: {mcc_scores[i-1]:.4f}\n')

print('Average ROC AUC:', round(sum(auc_scores)/len(auc_scores), 4))
print('Average Accuracy:', round(sum(accuracy_scores)/len(accuracy_scores), 4))
print('Average Precision:', round(sum(precision_scores)/len(precision_scores), 4))
print('Average Recall:', round(sum(recall_scores)/len(recall_scores), 4))
print('Average F1 Score:', round(sum(f1_scores)/len(f1_scores), 4))
print('Average MCC:', round(sum(mcc_scores)/len(mcc_scores), 4))

# Train predictions for the train set
preds_train = lr_model.predict_proba(X[:train.shape[0]])[:, 1]
preds_test = lr_model.predict_proba(X[train.shape[0]:])[:, 1]

# Calculate and print the ROC AUC for the training set
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))


Fold 1:
  ROC AUC: 1.0000
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000
  MCC: 1.0000

Fold 2:
  ROC AUC: 1.0000
  Accuracy: 0.9976
  Precision: 0.9966
  Recall: 1.0000
  F1 Score: 0.9983
  MCC: 0.9943

Fold 3:
  ROC AUC: 1.0000
  Accuracy: 0.9928
  Precision: 0.9899
  Recall: 1.0000
  F1 Score: 0.9949
  MCC: 0.9830

Fold 4:
  ROC AUC: 1.0000
  Accuracy: 0.9976
  Precision: 0.9966
  Recall: 1.0000
  F1 Score: 0.9983
  MCC: 0.9943

Fold 5:
  ROC AUC: 1.0000
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000
  MCC: 1.0000

Average ROC AUC: 1.0
Average Accuracy: 0.9976
Average Precision: 0.9966
Average Recall: 1.0
Average F1 Score: 0.9983
Average MCC: 0.9943
ROC AUC train: 1.0


In [None]:
devtest = pd.read_csv('/content/drive/MyDrive/Coling2025/english_devtest.csv')
devtest

Unnamed: 0,id,essay
0,abab7ec20320a84b1c1f097a39c895352e4341d6b00c3e...,"Enjoyment means entertainment, satisfaction of..."
1,15a035178efbb0f45d3d6e3421528e23721a802e6c4730...,Before to start with my opinion of the topic i...
2,ede4767f9bc94020ee2ed56ee0d03404c76c2877ce0218...,"Thats a really really good subject, but acctul..."
3,2c996b6d0beb561b9d0ea2b4426b3cf78cd88b544e0934...,I disagree with the statement saying that it i...
4,3cce0e704c3d2023155e3634f130c616bd49ff3b75eb8c...,It is undeniable that advertisements play a si...
...,...,...
864,a4faf8f47e79c51f71cafea21e8137fdeed7418a71229c...,Advertisements occupy a big part in our lives ...
865,318b7d74d32c4613b1ffc9b41b4a4b5fb321b815c27b05...,Young people are the fundamental building bloc...
866,6ca284a081ce0b739b68b3edca543780d88ee056425f3a...,"In my opinion, and experience, old people can ..."
867,980647f5bf2b94a4f67b6c36665b165440bbea259520d1...,I agree with this statement that most advertis...


In [None]:
X_devtest = vectorizer.transform(devtest['essay'])

In [None]:
devtest['label'] = lr_model.predict(X_devtest)

In [None]:
devtest['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,778
1,91


In [None]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(train['essay'])
y = train['label']

In [None]:
lr_model = LogisticRegression()
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []

for train_idx, val_idx in cv.split(X, y):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Train the model
    lr_model.fit(X_train, y_train)

    # Predict probabilities for the positive class (AI-generated)
    preds_val_lr = lr_model.predict_proba(X_val)[:, 1]

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_lr)
    auc_scores.append(auc_score)

# Print AUC scores and statistics
for i, score in enumerate(auc_scores, 1):
    print(f'ROC AUC for fold {i}: {score:.4f}')
print('Average ROC AUC:', round(sum(auc_scores) / len(auc_scores), 4))
print('Standard deviation:', round((sum([(x - sum(auc_scores) / len(auc_scores)) ** 2 for x in auc_scores]) / len(auc_scores)) ** 0.5, 4))


ROC AUC for fold 1: 1.0000
ROC AUC for fold 2: 1.0000
ROC AUC for fold 3: 1.0000
ROC AUC for fold 4: 1.0000
ROC AUC for fold 5: 1.0000
Average ROC AUC: 1.0
Standard deviation: 0.0


In [None]:
X_devtest = vectorizer.transform(devtest['essay'])
devtest['prediction'] = lr_model.predict(X_devtest)

In [None]:
devtest['prediction'].value_counts()

Unnamed: 0_level_0,count
prediction,Unnamed: 1_level_1
0,735
1,134


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

In [None]:
df = pd.concat([train['essay'], test['essay']], axis=0)

vectorizer = TfidfVectorizer(stop_words='english', max_features=50000)
X = vectorizer.fit_transform(df)

In [None]:
X

<3722x20291 sparse matrix of type '<class 'numpy.float64'>'
	with 362427 stored elements in Compressed Sparse Row format>

In [None]:
xgb_model = XGBClassifier()
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    xgb_model.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_xgb = xgb_model.predict_proba(X_val)[:, 1]

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_xgb)
    auc_scores.append(auc_score)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'ROC AUC for fold {i}: {score:.4f}')

print('Average ROC AUC:', round(sum(auc_scores)/len(auc_scores), 4))
print('Standard deviation:', round((sum([(x - sum(auc_scores)/len(auc_scores))**2 for x in auc_scores])/len(auc_scores))**0.5, 4))

ROC AUC for fold 1: 0.9997
ROC AUC for fold 2: 0.9989
ROC AUC for fold 3: 0.9995
ROC AUC for fold 4: 0.9997
ROC AUC for fold 5: 0.9993
Average ROC AUC: 0.9994
Standard deviation: 0.0003


In [None]:
preds_train = xgb_model.predict_proba(X[:train.shape[0]])[:,1]
preds_test = xgb_model.predict_proba(X[train.shape[0]:])[:,1]
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))

ROC AUC train: 0.9999718231403544


In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import numpy as np

xgb_model = XGBClassifier()
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
mcc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    xgb_model.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_xgb = xgb_model.predict_proba(X_val)[:, 1]

    # Get predictions for calculating other metrics
    preds_val = xgb_model.predict(X_val)

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_xgb)
    auc_scores.append(auc_score)

    # Calculate other metrics
    accuracy = accuracy_score(y_val, preds_val)
    precision = precision_score(y_val, preds_val)
    recall = recall_score(y_val, preds_val)
    f1 = f1_score(y_val, preds_val)
    mcc = matthews_corrcoef(y_val, preds_val)

    # Append the metrics to their respective lists
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'Fold {i}:')
    print(f'  ROC AUC: {score:.4f}')
    print(f'  Accuracy: {accuracy_scores[i-1]:.4f}')
    print(f'  Precision: {precision_scores[i-1]:.4f}')
    print(f'  Recall: {recall_scores[i-1]:.4f}')
    print(f'  F1 Score: {f1_scores[i-1]:.4f}')
    print(f'  MCC: {mcc_scores[i-1]:.4f}\n')

# Calculate and print average metrics
avg_auc = sum(auc_scores) / len(auc_scores)
avg_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_f1 = sum(f1_scores) / len(f1_scores)
avg_mcc = sum(mcc_scores) / len(mcc_scores)

# Calculate standard deviation for ROC AUC
std_auc = np.std(auc_scores)

print('Average ROC AUC:', round(avg_auc, 4))
print('Standard Deviation ROC AUC:', round(std_auc, 4))
print('Average Accuracy:', round(avg_accuracy, 4))
print('Average Precision:', round(avg_precision, 4))
print('Average Recall:', round(avg_recall, 4))
print('Average F1 Score:', round(avg_f1, 4))
print('Average MCC:', round(avg_mcc, 4))

# Train predictions for the train set
preds_train = xgb_model.predict_proba(X[:train.shape[0]])[:, 1]
preds_test = xgb_model.predict_proba(X[train.shape[0]:])[:, 1]

# Calculate and print the ROC AUC for the training set
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))


Fold 1:
  ROC AUC: 0.9998
  Accuracy: 0.9905
  Precision: 0.9899
  Recall: 0.9966
  F1 Score: 0.9932
  MCC: 0.9773

Fold 2:
  ROC AUC: 0.9998
  Accuracy: 0.9905
  Precision: 0.9899
  Recall: 0.9966
  F1 Score: 0.9932
  MCC: 0.9772

Fold 3:
  ROC AUC: 0.9988
  Accuracy: 0.9785
  Precision: 0.9733
  Recall: 0.9966
  F1 Score: 0.9848
  MCC: 0.9489

Fold 4:
  ROC AUC: 0.9996
  Accuracy: 0.9881
  Precision: 0.9898
  Recall: 0.9932
  F1 Score: 0.9915
  MCC: 0.9716

Fold 5:
  ROC AUC: 0.9999
  Accuracy: 0.9928
  Precision: 0.9932
  Recall: 0.9966
  F1 Score: 0.9949
  MCC: 0.9830

Average ROC AUC: 0.9996
Standard Deviation ROC AUC: 0.0004
Average Accuracy: 0.9881
Average Precision: 0.9872
Average Recall: 0.9959
Average F1 Score: 0.9915
Average MCC: 0.9716
ROC AUC train: 0.999994581373145


In [None]:
X_devtest = vectorizer.transform(devtest['essay'])
devtest['XGBoost'] = xgb_model.predict(X_devtest)

In [None]:
devtest['XGBoost'].value_counts()

Unnamed: 0_level_0,count
XGBoost,Unnamed: 1_level_1
0,669
1,200


In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

In [None]:
cat=CatBoostClassifier(iterations=1,
                           verbose=0,
                           random_seed=6543,
                           learning_rate=0.005599066836106983,
                           subsample = 0.35,
                           allow_const_label=True,loss_function = 'CrossEntropy')
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    cat.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_lr = cat.predict_proba(X_val)[:, 1]

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_lr)
    auc_scores.append(auc_score)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'ROC AUC for fold {i}: {score:.4f}')

print('Average ROC AUC:', round(sum(auc_scores)/len(auc_scores), 4))
print('Standard deviation:', round((sum([(x - sum(auc_scores)/len(auc_scores))**2 for x in auc_scores])/len(auc_scores))**0.5, 4))

ROC AUC for fold 1: 1.0000
ROC AUC for fold 2: 1.0000
ROC AUC for fold 3: 1.0000
ROC AUC for fold 4: 1.0000
ROC AUC for fold 5: 1.0000
ROC AUC for fold 6: 0.9318
ROC AUC for fold 7: 0.9667
ROC AUC for fold 8: 0.9470
ROC AUC for fold 9: 0.9047
ROC AUC for fold 10: 0.9279
Average ROC AUC: 0.9678
Standard deviation: 0.0353


In [None]:
preds_train = cat.predict_proba(X[:train.shape[0]])[:,1]
preds_test = cat.predict_proba(X[train.shape[0]:])[:,1]
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))

ROC AUC train: 0.9273600558335311


In [None]:
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import numpy as np

# Initialize CatBoostClassifier
cat = CatBoostClassifier(
    iterations=1,
    verbose=0,
    random_seed=6543,
    learning_rate=0.005599066836106983,
    subsample=0.35,
    allow_const_label=True,
    loss_function='CrossEntropy'
)

cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
mcc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the model on the training data
    cat.fit(X_train, y_train)

    # Predict probabilities for the positive class on the validation data
    preds_val_cat = cat.predict_proba(X_val)[:, 1]

    # Get predictions for calculating other metrics
    preds_val = cat.predict(X_val)

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val_cat)
    auc_scores.append(auc_score)

    # Calculate other metrics
    accuracy = accuracy_score(y_val, preds_val)
    precision = precision_score(y_val, preds_val)
    recall = recall_score(y_val, preds_val)
    f1 = f1_score(y_val, preds_val)
    mcc = matthews_corrcoef(y_val, preds_val)

    # Append the metrics to their respective lists
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)

# Print the scores for each fold
for i, score in enumerate(auc_scores, 1):
    print(f'Fold {i}:')
    print(f'  ROC AUC: {score:.4f}')
    print(f'  Accuracy: {accuracy_scores[i-1]:.4f}')
    print(f'  Precision: {precision_scores[i-1]:.4f}')
    print(f'  Recall: {recall_scores[i-1]:.4f}')
    print(f'  F1 Score: {f1_scores[i-1]:.4f}')
    print(f'  MCC: {mcc_scores[i-1]:.4f}\n')

# Calculate and print average metrics
avg_auc = sum(auc_scores) / len(auc_scores)
avg_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_f1 = sum(f1_scores) / len(f1_scores)
avg_mcc = sum(mcc_scores) / len(mcc_scores)

# Calculate standard deviation for ROC AUC
std_auc = np.std(auc_scores)

print('Average ROC AUC:', round(avg_auc, 4))
print('Standard Deviation ROC AUC:', round(std_auc, 4))
print('Average Accuracy:', round(avg_accuracy, 4))
print('Average Precision:', round(avg_precision, 4))
print('Average Recall:', round(avg_recall, 4))
print('Average F1 Score:', round(avg_f1, 4))
print('Average MCC:', round(avg_mcc, 4))


Fold 1:
  ROC AUC: 0.8862
  Accuracy: 0.8881
  Precision: 0.8997
  Recall: 0.9456
  F1 Score: 0.9221
  MCC: 0.7270

Fold 2:
  ROC AUC: 0.9502
  Accuracy: 0.8998
  Precision: 0.9172
  Recall: 0.9422
  F1 Score: 0.9295
  MCC: 0.7569

Fold 3:
  ROC AUC: 0.9434
  Accuracy: 0.8831
  Precision: 0.9388
  Recall: 0.8908
  F1 Score: 0.9142
  MCC: 0.7336

Fold 4:
  ROC AUC: 0.9250
  Accuracy: 0.8807
  Precision: 0.9386
  Recall: 0.8874
  F1 Score: 0.9123
  MCC: 0.7290

Fold 5:
  ROC AUC: 0.9290
  Accuracy: 0.8998
  Precision: 0.9142
  Recall: 0.9454
  F1 Score: 0.9295
  MCC: 0.7574

Average ROC AUC: 0.9268
Standard Deviation ROC AUC: 0.0223
Average Accuracy: 0.8903
Average Precision: 0.9217
Average Recall: 0.9223
Average F1 Score: 0.9215
Average MCC: 0.7408


In [None]:
# Create the ensemble model
ensemble = VotingClassifier(estimators=[('lr', lr_model), ('xgb', xgb_model), ('cat', cat)], voting='soft')

ensemble.fit(X_train, y_train)

# Predict on the validation set
y_pred = ensemble.predict(X_val)

# Print the classification report
print(classification_report(y_val, y_pred))

# Print the accuracy score
print(f'Accuracy: {roc_auc_score(y_val, y_pred)}\n')

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       126
           1       0.99      1.00      0.99       293

    accuracy                           0.99       419
   macro avg       0.99      0.99      0.99       419
weighted avg       0.99      0.99      0.99       419

Accuracy: 0.988095238095238



In [None]:
preds_train = ensemble.predict_proba(X[:train.shape[0]])[:,1]
preds_test = ensemble.predict_proba(X[train.shape[0]:])[:,1]
print('ROC AUC train:', roc_auc_score(train['label'], preds_train))

ROC AUC train: 0.999997832549258


In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import numpy as np

# Create the ensemble model
ensemble = VotingClassifier(estimators=[('lr', lr_model), ('xgb', xgb_model), ('cat', cat)], voting='soft')

# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True)
auc_scores = []
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
mcc_scores = []

# Split the data into training and validation for each fold
for train_idx, val_idx in cv.split(X[:train.shape[0]], train['label']):
    X_train, X_val = X[:train.shape[0]][train_idx], X[:train.shape[0]][val_idx]
    y_train, y_val = train['label'].iloc[train_idx], train['label'].iloc[val_idx]

    # Train the ensemble model
    ensemble.fit(X_train, y_train)

    # Predict on the validation set
    y_pred = ensemble.predict(X_val)

    # Predict probabilities for ROC AUC calculation
    preds_val = ensemble.predict_proba(X_val)[:, 1]

    # Calculate ROC AUC score for the validation set
    auc_score = roc_auc_score(y_val, preds_val)
    auc_scores.append(auc_score)

    # Calculate other metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    mcc = matthews_corrcoef(y_val, y_pred)

    # Append the metrics to their respective lists
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)

# Print the classification report for the last fold
print(classification_report(y_val, y_pred))

# Print metrics for each fold
for i in range(len(auc_scores)):
    print(f'Fold {i + 1}:')
    print(f'  ROC AUC: {auc_scores[i]:.4f}')
    print(f'  Accuracy: {accuracy_scores[i]:.4f}')
    print(f'  Precision: {precision_scores[i]:.4f}')
    print(f'  Recall: {recall_scores[i]:.4f}')
    print(f'  F1 Score: {f1_scores[i]:.4f}')
    print(f'  MCC: {mcc_scores[i]:.4f}\n')

# Calculate and print average metrics
avg_auc = sum(auc_scores) / len(auc_scores)
avg_accuracy = sum(accuracy_scores) / len(accuracy_scores)
avg_precision = sum(precision_scores) / len(precision_scores)
avg_recall = sum(recall_scores) / len(recall_scores)
avg_f1 = sum(f1_scores) / len(f1_scores)
avg_mcc = sum(mcc_scores) / len(mcc_scores)

# Calculate standard deviation for ROC AUC
std_auc = np.std(auc_scores)

print('Average ROC AUC:', round(avg_auc, 4))
print('Standard Deviation ROC AUC:', round(std_auc, 4))
print('Average Accuracy:', round(avg_accuracy, 4))
print('Average Precision:', round(avg_precision, 4))
print('Average Recall:', round(avg_recall, 4))
print('Average F1 Score:', round(avg_f1, 4))
print('Average MCC:', round(avg_mcc, 4))

# Predictions for the training and test sets
preds_train = ensemble.predict_proba(X[:train.shape[0]])[:, 1]
preds_test = ensemble.predict_proba(X[train.shape[0]:])[:, 1]

print('ROC AUC train:', roc_auc_score(train['label'], preds_train))


              precision    recall  f1-score   support

           0       1.00      0.98      0.99       126
           1       0.99      1.00      1.00       293

    accuracy                           1.00       419
   macro avg       1.00      0.99      0.99       419
weighted avg       1.00      1.00      1.00       419

Fold 1:
  ROC AUC: 1.0000
  Accuracy: 0.9952
  Precision: 0.9932
  Recall: 1.0000
  F1 Score: 0.9966
  MCC: 0.9887

Fold 2:
  ROC AUC: 1.0000
  Accuracy: 0.9952
  Precision: 0.9966
  Recall: 0.9966
  F1 Score: 0.9966
  MCC: 0.9886

Fold 3:
  ROC AUC: 1.0000
  Accuracy: 0.9976
  Precision: 0.9966
  Recall: 1.0000
  F1 Score: 0.9983
  MCC: 0.9943

Fold 4:
  ROC AUC: 1.0000
  Accuracy: 0.9976
  Precision: 0.9966
  Recall: 1.0000
  F1 Score: 0.9983
  MCC: 0.9943

Fold 5:
  ROC AUC: 1.0000
  Accuracy: 0.9952
  Precision: 0.9932
  Recall: 1.0000
  F1 Score: 0.9966
  MCC: 0.9887

Average ROC AUC: 1.0
Standard Deviation ROC AUC: 0.0
Average Accuracy: 0.9962
Average Precisi

In [None]:
devtest['ensemble'] = ensemble.predict(X_devtest)
devtest

Unnamed: 0,id,essay,label,prediction,XGBoost,ensemble
0,abab7ec20320a84b1c1f097a39c895352e4341d6b00c3e...,"Enjoyment means entertainment, satisfaction of...",0,0,0,0
1,15a035178efbb0f45d3d6e3421528e23721a802e6c4730...,Before to start with my opinion of the topic i...,0,0,0,0
2,ede4767f9bc94020ee2ed56ee0d03404c76c2877ce0218...,"Thats a really really good subject, but acctul...",0,0,0,0
3,2c996b6d0beb561b9d0ea2b4426b3cf78cd88b544e0934...,I disagree with the statement saying that it i...,0,0,1,0
4,3cce0e704c3d2023155e3634f130c616bd49ff3b75eb8c...,It is undeniable that advertisements play a si...,0,0,1,1
...,...,...,...,...,...,...
864,a4faf8f47e79c51f71cafea21e8137fdeed7418a71229c...,Advertisements occupy a big part in our lives ...,0,0,0,0
865,318b7d74d32c4613b1ffc9b41b4a4b5fb321b815c27b05...,Young people are the fundamental building bloc...,0,0,0,0
866,6ca284a081ce0b739b68b3edca543780d88ee056425f3a...,"In my opinion, and experience, old people can ...",0,0,0,0
867,980647f5bf2b94a4f67b6c36665b165440bbea259520d1...,I agree with this statement that most advertis...,0,0,0,0


In [None]:
devtest['ensemble'].value_counts()

Unnamed: 0_level_0,count
ensemble,Unnamed: 1_level_1
0,675
1,194
