In [1]:
import joblib
import pandas as pd
import scipy
from sklearn.metrics import classification_report


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
train_df = pd.read_csv("../nlp-data/liwc_pos_dep_tr.csv")
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")


In [3]:
labels2id = {alg:i for i, alg in enumerate(train_df["alg"].unique())}
labels2id

{'human': 0,
 'fair': 1,
 'grover': 2,
 'gpt2': 3,
 'gpt3': 4,
 'instructgpt': 5,
 'gpt': 6,
 'ctrl': 7,
 'pplm': 8,
 'xlnet': 9,
 'xlm': 10}

In [4]:
id2labels = {v:k for k, v in labels2id.items()}
id2labels

{0: 'human',
 1: 'fair',
 2: 'grover',
 3: 'gpt2',
 4: 'gpt3',
 5: 'instructgpt',
 6: 'gpt',
 7: 'ctrl',
 8: 'pplm',
 9: 'xlnet',
 10: 'xlm'}

In [5]:
def get_features_test(data, vectorizer=None, numerical_fields=None):
  if not vectorizer:
    raise Exception("Vectorizer need")
  
  tfidf = vectorizer.get("tfidf", {})
  count_vec = vectorizer.get("count", {})

  features = []
  if not numerical_fields:
    numerical_fields = []

  for ele in tfidf:
    features.append(ele[1].transform(data[ele[0]]))

  for ele in count_vec:
    features.append(ele[1].transform(data[ele[0]]))

  for field in numerical_fields:
    features.append(scipy.sparse.csr_matrix(data[field]).T)

  return scipy.sparse.hstack(features)

In [6]:
def test_model(model, X, y, id2labels):
  preds = list(map(lambda x: id2labels[x], model.predict(X)))
  preds = ['machine' if item != 'human' else item for item in preds]
  true_label = y.replace(['fair', 'grover', 'gpt2', 'gpt3', 'instructgpt', 'gpt', 'ctrl', 'pplm', 'xlnet', 'xlm'],'machine')
  return(classification_report(preds, true_label))
  # return


In [7]:
import joblib

# trained only on generation text
vectorizer_generation = joblib.load("../nlp-models/vectorizer_generation.pkl")
lsvc_generation = joblib.load("../nlp-models/lsvc_generation.pkl")
rf_generation = joblib.load("../nlp-models/rf_generation.pkl")
xgb_generation = joblib.load("../nlp-models/xgb_generation.pkl")


# trained only on pos tags of the text
vectorizer_pos = joblib.load("../nlp-models/vectorizer_pos.pkl")
lsvc_pos = joblib.load("../nlp-models/lsvc_pos.pkl")
rf_pos = joblib.load("../nlp-models/rf_pos.pkl")
xgb_pos = joblib.load("../nlp-models/xgb_pos.pkl")


# trained on pos,dep tags and liwc like "semantic_coherence", "Analytic", "WPS", "article", "Period" of the text
vectorizer_pos_dep_liwc = joblib.load("../nlp-models/vectorizer_pos_dep_liwc.pkl")
lsvc_pos_dep_liwc = joblib.load("../nlp-models/lsvc_pos_dep_liwc.pkl")
rf_pos_dep_liwc = joblib.load("../nlp-models/rf_pos_dep_liwc.pkl")
xgb_pos_dep_liwc = joblib.load("../nlp-models/xgb_pos_dep_liwc.pkl")



## **On Generation Text**

In [8]:
test_features_generation = get_features_test(test_df, vectorizer=vectorizer_generation)

# true_label = test_df["alg"].replace(['fair', 'grover', 'gpt2', 'gpt3', 'instructgpt', 'gpt', 'ctrl', 'pplm', 'xlnet', 'xlm'],'machine')

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_generation, test_features_generation, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_generation, test_features_generation, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_generation, test_features_generation, test_df["alg"], id2labels))

----------------------Linear SVC---------------------------
              precision    recall  f1-score   support

       human       0.81      0.77      0.79       222
     machine       0.95      0.96      0.96      1057

    accuracy                           0.93      1279
   macro avg       0.88      0.87      0.87      1279
weighted avg       0.93      0.93      0.93      1279

----------------------Random Forest---------------------------
              precision    recall  f1-score   support

       human       1.00      0.92      0.96       232
     machine       0.98      1.00      0.99      1047

    accuracy                           0.99      1279
   macro avg       0.99      0.96      0.97      1279
weighted avg       0.99      0.99      0.98      1279

----------------------XGBOOST---------------------------
              precision    recall  f1-score   support

       human       0.96      0.91      0.93       226
     machine       0.98      0.99      0.99      1053

  

In [9]:
# same way for reddit dataset

## **POS SEQUENCE**

In [10]:
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

----------------------Linear SVC---------------------------
              precision    recall  f1-score   support

       human       0.79      0.70      0.74       239
     machine       0.93      0.96      0.94      1040

    accuracy                           0.91      1279
   macro avg       0.86      0.83      0.84      1279
weighted avg       0.91      0.91      0.91      1279

----------------------Random Forest---------------------------
              precision    recall  f1-score   support

       human       0.85      0.91      0.88       199
     machine       0.98      0.97      0.98      1080

    accuracy                           0.96      1279
   macro avg       0.92      0.94      0.93      1279
weighted avg       0.96      0.96      0.96      1279

----------------------XGBOOST---------------------------
              precision    recall  f1-score   support

       human       0.88      0.91      0.90       207
     machine       0.98      0.98      0.98      1072

  

In [11]:
# same for reddit data set

### 1. CTRL

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
filtered_test_df = test_df[test_df['alg'].isin(['ctrl', 'human'])]
test_features_pos = get_features_test(filtered_test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, filtered_test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, filtered_test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, filtered_test_df["alg"], id2labels))

### 2. FAIR

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['fair', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 3. GPT

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 4. GPT2

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt2', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 5. GPT3

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt3', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 6. GROVER

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['grover', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 7. InstructGPT

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['instructgpt', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 8. PPLM

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['pplm', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 9. XLNET

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['xlnet', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

### 10. XLM

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['xlm', 'human'])]
test_features_pos = get_features_test(test_df, vectorizer=vectorizer_pos)

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos, test_features_pos, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos, test_features_pos, test_df["alg"], id2labels))

# POS DEP AND LIWC

In [22]:
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

----------------------Linear SVC---------------------------
              precision    recall  f1-score   support

       human       0.82      1.00      0.90       175
     machine       1.00      0.74      0.85       145

    accuracy                           0.88       320
   macro avg       0.91      0.87      0.88       320
weighted avg       0.90      0.88      0.88       320

----------------------Random Forest---------------------------
              precision    recall  f1-score   support

       human       0.85      1.00      0.92       182
     machine       1.00      0.78      0.87       138

    accuracy                           0.90       320
   macro avg       0.93      0.89      0.90       320
weighted avg       0.92      0.90      0.90       320

----------------------XGBOOST---------------------------
              precision    recall  f1-score   support

       human       0.91      1.00      0.95       194
     machine       1.00      0.85      0.92       126

  

### 1. CTRL

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['ctrl', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 2. FAIR

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['fair', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 3. GPT

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 4. GPT2

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt2', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 5. GPT3

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['gpt3', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 6. GROVER

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['grover', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 7. InstructGPT

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['instructgpt', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 8. PPLM

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['pplm', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 9. XLNET

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['xlnet', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

### 10. XLM

In [None]:
test_df = pd.read_csv("../nlp-data/liwc_pos_dep_eval.csv")
reddit_df = pd.read_csv("../nlp-data/liwc_pos_dep_reddit.csv")
test_df = test_df[test_df['alg'].isin(['xlm', 'human'])]
test_features_pos_dep_liwc = get_features_test(test_df, vectorizer=vectorizer_pos_dep_liwc, numerical_fields=["semantic_coherence", "Analytic", "WPS", "article", "Period"])

print("----------------------Linear SVC---------------------------")
print(test_model(lsvc_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------Random Forest---------------------------")
print(test_model(rf_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))

print("----------------------XGBOOST---------------------------")
print(test_model(xgb_pos_dep_liwc, test_features_pos_dep_liwc, test_df["alg"], id2labels))