In [1]:
import pandas as pd
HC3_filtered = pd.concat([pd.read_hdf('/content/drive/MyDrive/HC3 - filtered/GPT_df_3.h5'), pd.read_hdf('/content/drive/MyDrive/HC3 - filtered/Human_df_3.h5')], axis=0).fillna(0)
GPT_2 = pd.concat([pd.read_hdf('/content/drive/MyDrive/GPT2/GPT_df_gpt2.h5'), pd.read_hdf('/content/drive/MyDrive/GPT2/Human_df_gpt2.h5')], axis=0).fillna(0)
HC3_unfiltered = pd.concat([pd.read_hdf('/content/drive/MyDrive/HC3 - Unfiltered/GPT_df_3.h5'), pd.read_hdf('/content/drive/MyDrive/HC3 - Unfiltered/Human_df_3.h5')], axis=0).fillna(0)


In [2]:
# Replacement dictionary
replacement_dict = {
    'Human_train': 'train',
    'Human_test': 'test',
    'Human_val': 'val',
    'GPT_train': 'train',
    'GPT_test': 'test',
    'GPT_val': 'val'
}

# Apply the replacements
GPT_2 = GPT_2.replace(replacement_dict)

In [3]:
# Create the datasets dictionary
datasets = {
    'HC3_filtered': HC3_filtered,
    'HC3_unfiltered': HC3_unfiltered,
     'GPT_2': GPT_2
}

In [4]:
from torch.utils.data import Dataset
import numpy as np
import re
import nltk
import string

class TextDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=500):
        texts = dataframe.text.values.tolist()
        #texts = [self._preprocess(text) for text in texts]
        self._print_random_samples(texts)

        self.texts = tokenizer(texts, padding='max_length',
                                max_length=max_length,
                                truncation=True,
                              return_tensors="pt")

        if 'label' in dataframe:
            self.labels = dataframe.label.values.tolist()

    def _print_random_samples(self, texts):
        random_entries = np.random.choice(len(texts), 5, replace=False)
        for i in random_entries:
            print(f"Entry {i}: {texts[i]}")
        print()

    def __len__(self):
        return len(self.texts['input_ids'])

    def __getitem__(self, idx):
        text = {'input_ids': self.texts['input_ids'][idx],
                'attention_mask': self.texts['attention_mask'][idx]}
        label = -1
        if hasattr(self, 'labels'):
            label = self.labels[idx]
        return text, label

In [5]:
import torch
from tqdm import tqdm
def get_text_predictions(model, loader):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    model = model.to(device)


    results_predictions = []
    with torch.no_grad():
        model.eval()
        for data_input, _ in tqdm(loader):
            attention_mask = data_input['attention_mask'].to(device)
            input_ids = data_input['input_ids'].squeeze(1).to(device)


            output = model(input_ids, attention_mask)

            output = (output > 0.5).int()
            results_predictions.append(output)

    return torch.cat(results_predictions).cpu().detach().numpy()

In [22]:
# Load models
models = {
    "HC3_filtered_model": torch.load("/content/drive/MyDrive/Project/roberta_gpt3_filtered.pt", map_location=torch.device('cuda')),
    "HC3_unfiltered_model": torch.load("/content/drive/MyDrive/Project/roberta_gpt3_unfiltered.pt", map_location=torch.device('cuda')),
    "GPT_2_model": torch.load("/content/drive/MyDrive/Project/roberta_gpt2.pt", map_location=torch.device('cuda'))
}


In [24]:
from sklearn.metrics import classification_report, f1_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


f1_results = []

for model_name, model in models.items():
    for test_name, df in datasets.items():
        train_df = next(iter(datasets.values()))  # get first dataset for column structure
        feature_cols = train_df.drop(['source', 'text', 'label'], axis=1).columns.tolist()

        df_test = df[df['source'] == 'test']
        test_dataloader = DataLoader(TextDataset(df_test, tokenizer), batch_size=8, shuffle=False, num_workers=0)

        predictions = get_text_predictions(model, test_dataloader)

        # Compute F1 score (binary classification)
        f1 = f1_score(df_test.label, predictions)

        # Append results to the f1_results list
        f1_results.append((model_name, test_name, f1))

        print(f"Model {model_name}, Testing on {test_name}")
        print(classification_report(df_test.label, predictions))

Entry 13135: I ' l just try to elaborate a bit. There are only 2 common ways to increase the pressure of any fluid (including water). Pressure is equal to the density of the liquid times the height, it doesn't matter how wide or deep the pipe is, the water pressure at the bottom of a 2 inch diameter tube that ' 100 feet tall will be the same as a 2000 inch diameter tube that is 100 feet tall. So water is either collected at high elevation (rain collecting) or pumped to a higher elevation (tank at the top of a tower). Pumping is the second method to increase pressure, which is pretty obvious, using electrical power, to turn mechanical parts that apply force to the fluid to increase pressure.
Entry 18017: Well, the laws vary considerably based on where you ' e planning on doing it.
Entry 16102: There is no requirement to tip, as long as you do not visit the same restaurant twice.
Entry 11454: Maybe a little reddit gold can help that awful hypothetical scenario. Edit: thanks for the gold,

100%|██████████| 3020/3020 [03:39<00:00, 13.74it/s]


Model HC3_filtered_model, Testing on HC3_filtered
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     16148
           1       0.96      1.00      0.98      8012

    accuracy                           0.99     24160
   macro avg       0.98      0.99      0.98     24160
weighted avg       0.99      0.99      0.99     24160

Entry 13147: It not for removing stain and adhered debris, it for quickly getting large chunks of food or fuel scraped off of the grill. This might be the most layman thing I ' e ever had to answer under the Physics tag. Bravo.
Entry 24348: Hello and welcome,I competely understand the issue. If you attempt pregnancy now, most likely you will require insulin right from the beginning as pregnancy itself causes a diabetes like state which becomes more manifested in people who are already diabetic. Also you may need higher doses than in previous pregnancy. If the sugars are well controlled throughout the pregnancy then 

100%|██████████| 3075/3075 [03:44<00:00, 13.68it/s]


Model HC3_filtered_model, Testing on HC3_unfiltered
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     16580
           1       0.96      1.00      0.98      8014

    accuracy                           0.98     24594
   macro avg       0.98      0.99      0.98     24594
weighted avg       0.99      0.98      0.98     24594

Entry 5806: Just the type of daddy bear this daddy lusts for. I will pread my legs for a man like this-vocal, hot and who knows what he wants and takes it. Giving myself for service-to a man like this-is my greatest pleasure. Post Reply
Entry 4642: After the death of her step-father to breast cancer, Karielle is diagnosed with type 1 diabetes. When she doesn't receive proper nutrition or exercise, her disease takes its toll on her bones and muscle. Karielle struggles to meet financial goals such as buying a car or buying a home. She finds it very difficult to put her own needs first and has to rely on others to fi

100%|██████████| 1250/1250 [01:33<00:00, 13.38it/s]


Model HC3_filtered_model, Testing on GPT_2
              precision    recall  f1-score   support

           0       0.64      0.76      0.69      5000
           1       0.70      0.58      0.63      5000

    accuracy                           0.67     10000
   macro avg       0.67      0.67      0.66     10000
weighted avg       0.67      0.67      0.66     10000

Entry 8457: Most likely bowel obstruction which would be painful. They see stuff like that all the time
Entry 23222: Yes, there are a lot of places you can research stocks online, Google Finance, Yahoo Finance, Reuters etc. It's important to understand that the price of the stock doesn't actually mean anything. Share price is just a function of the market capitalization divided by the number of shares outstanding. As an example take two companies that are both worth $1 million, but Company A has issued 10,000 shares and Company B has issued 100,000 shares. Company A has a share price of $100 while Company B has a share pri

100%|██████████| 3020/3020 [03:39<00:00, 13.73it/s]


Model HC3_unfiltered_model, Testing on HC3_filtered
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     16148
           1       0.96      1.00      0.98      8012

    accuracy                           0.99     24160
   macro avg       0.98      0.99      0.98     24160
weighted avg       0.99      0.99      0.99     24160

Entry 15783: Many Bots are humans who have made a novelty account to impersonate bots.
Entry 91: Fat Tuesday, also known as Mardi Gras, is a holiday that is celebrated in some parts of the world, including the United States. It falls on the Tuesday before Ash Wednesday, which marks the start of the Christian season of Lent. During Lent, many people give up certain things, such as certain foods or activities, as a way of preparing for Easter. Fat Tuesday is a way for people to celebrate and have fun before the more somber season of Lent begins. On this day, people often dress up in costumes, attend parades and part

100%|██████████| 3075/3075 [03:44<00:00, 13.69it/s]


Model HC3_unfiltered_model, Testing on HC3_unfiltered
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     16580
           1       0.96      1.00      0.98      8014

    accuracy                           0.98     24594
   macro avg       0.98      0.99      0.98     24594
weighted avg       0.99      0.98      0.98     24594

Entry 6385: Cuthbert Dube Zimbabwe FA (Zifa) president Cuthbert Dube resigned before a meeting that passed a vote of no confidence. He had been set to continue in the role until elections are held on 5 December but has now been replaced by CEO Jonathan Mashingaidze until then. Football's world governing body Fifa says they will not be sending any more development money to Zifa until fully audited accounts have been produced. Dube had been in charge for five years, when Zifa's debt rose to around US$ 6m. The financial crisis lead to national teams repeatedly struggling to fulfil away assignments due to a lack of 

100%|██████████| 1250/1250 [01:33<00:00, 13.35it/s]


Model HC3_unfiltered_model, Testing on GPT_2
              precision    recall  f1-score   support

           0       0.64      0.76      0.69      5000
           1       0.70      0.58      0.63      5000

    accuracy                           0.67     10000
   macro avg       0.67      0.67      0.66     10000
weighted avg       0.67      0.67      0.66     10000

Entry 843: When a bomb is falling through the air, the air rushing past it can cause a whistling sound. Think of it like when you blow across the top of a bottle and it makes a whistling sound. The bomb is moving very fast through the air, so the air rushing past it is also moving very fast, which can create a whistling sound. In movies, this sound is often added to make the scene more dramatic or intense. In real life, you might be able to hear this whistling sound if you were close enough to the bomb as it was falling. However, it's important to remember that bombs are very dangerous and should be avoided.
Entry 2993: 

100%|██████████| 3020/3020 [03:40<00:00, 13.69it/s]


Model GPT_2_model, Testing on HC3_filtered
              precision    recall  f1-score   support

           0       0.83      0.83      0.83     16148
           1       0.66      0.65      0.65      8012

    accuracy                           0.77     24160
   macro avg       0.74      0.74      0.74     24160
weighted avg       0.77      0.77      0.77     24160

Entry 14176: They don't have a property of color. They only have a wavelength. "Color" is what we call the sensation we have in our brains when or eyes are exposed to different wavelengths of light. We perceive different wavelengths as looking different, and we call that perceptual difference "color".
Entry 19402: Mine does. It won't start without it.
Entry 23470: Firstly "Most option traders don't want to actually buy or sell the underlying stock."THIS IS COMPLETELY UTTERLY FALSE Perhaps the problem is that you are only familiar with the BUY side of options trading. On the sell side of options trading, an options desk eng

100%|██████████| 3075/3075 [03:44<00:00, 13.67it/s]


Model GPT_2_model, Testing on HC3_unfiltered
              precision    recall  f1-score   support

           0       0.83      0.84      0.84     16580
           1       0.66      0.65      0.66      8014

    accuracy                           0.78     24594
   macro avg       0.75      0.74      0.75     24594
weighted avg       0.78      0.78      0.78     24594

Entry 7987: The U.S. Department of Homeland Security's monitoring of social media services could be a threat to civil liberties and online free speech, several lawmakers said during a hearing Thursday. According to a report by a civil liberties group called the Electronic Privacy Information Center, or EPIC, DHS paid more than $11 million to General Dynamics for a system to keep an eye on Facebook and Twitter public posts, as well as comment threads on major news websites. EPIC's report found that the system watches public social media posts for comments that "adversely reflect"on the government, and for responses to pro

100%|██████████| 1250/1250 [01:33<00:00, 13.37it/s]

Model GPT_2_model, Testing on GPT_2
              precision    recall  f1-score   support

           0       1.00      0.95      0.98      5000
           1       0.96      1.00      0.98      5000

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000






In [25]:
# Convert f1_results to DataFrame
df_f1 = pd.DataFrame(f1_results, columns=["Training Set", "Testing Set", "F1 Score"])
print(df_f1)

           Training Set     Testing Set  F1 Score
0    HC3_filtered_model    HC3_filtered  0.980042
1    HC3_filtered_model  HC3_unfiltered  0.976878
2    HC3_filtered_model           GPT_2  0.633348
3  HC3_unfiltered_model    HC3_filtered  0.980042
4  HC3_unfiltered_model  HC3_unfiltered  0.976878
5  HC3_unfiltered_model           GPT_2  0.633348
6           GPT_2_model    HC3_filtered  0.654914
7           GPT_2_model  HC3_unfiltered  0.655949
8           GPT_2_model           GPT_2  0.976899
