# Preparation and evaluation of manual annotation

This notebook is a part of the project to create a dataset for manual annotation. This dataset will be the same as the test set used for our model. The goal is to evaluate the quality of the model compared to a human annotator and to understand the errors made by the model. 

## Step 1: Create full dataset consisting of human generated text and AI generated text

In [None]:
import pandas as pd

path1 = "cleaned_final_title_based.csv"
path2 = "cleaned_final.csv"

# Read the CSV files into DataFrames
df1 = pd.read_csv(path1)
df2 = pd.read_csv(path2)

# Select specific columns from df1 and df2
selected_columns_df1 = df1[['Combined Text', 'Rewritten Text']]

df2.rename(columns={'Rewritten Text': 'Generated Text'}, inplace=True)
selected_columns_df2 = df2[['Generated Text']]

# Combine the selected columns into a new DataFrame
new_df = pd.concat([selected_columns_df1, selected_columns_df2], axis=1)

In [123]:
new_df.to_csv('combined_file_final.csv', index=False)

In [None]:
new_df.head()

## Step 2: Create test set for manual annotation

In [126]:
from sklearn.model_selection import train_test_split

In [None]:
def load_data():
    data = pd.read_csv("combined_file_final.csv", sep=";")
    human_df = data[["Combined Text"]]
    human_df["label"] = 0
    ai_df = data[["Rewritten Text"]]
    ai_df["label"] = 1
    ai_df2 = data[["Generated Text"]]
    ai_df2["label"] = 1
    human_df = human_df.rename(columns={"Combined Text": "Text"})
    ai_df = ai_df.rename(columns={"Rewritten Text": "Text"})
    ai_df2 = ai_df2.rename(columns={"Generated Text": "Text"})
    data = pd.concat([human_df, ai_df, ai_df2], ignore_index=True)
    return data

data = load_data()
data.head()

In [None]:
data = data.sample(frac=1, random_state=42).reset_index(drop=True)
train_texts, test_texts, train_labels, test_labels = train_test_split(data["Text"], data["label"], test_size=0.2, random_state=42, stratify=data["label"])    
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, random_state=42, stratify=train_labels)
train_texts

In [98]:
# Save the test texts and labels to separate CSV files
test_texts.to_csv('test_texts_manuel_annotation.csv', index=False, header=True)
test_labels.to_csv('test_labels_manuel_annotation.csv', index=False, header=True)


In [None]:
print(train_texts.shape)
print(test_texts.shape)

## Step 3: Evaluate manual annotation

In [None]:
from sklearn.metrics import f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [120]:
path1 = "test_texts_manuel_annotation_niclas.csv"
path2 = "test_labels_manuel_annotation.csv"

# Read the first CSV file and keep only the first 250 rows
df1 = pd.read_csv(path1, sep=";").head(250)

# Read the second CSV file and keep only the first 250 rows
df2 = pd.read_csv(path2).head(250)

In [None]:
# Assuming the label columns are named 'label' in both dataframes
labels1 = df1['Label']
labels2 = df2['label']

# Calculate the F1 score
conf_matrix = confusion_matrix(labels1, labels2)

print(f'F1 Score: {f1}')
print('Confusion Matrix:')
print(conf_matrix)

# Plot the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:

# Get the indices of NaN values in each label column
nan_indices_labels1 = labels1[labels1.isna()].index
nan_indices_labels2 = labels2[labels2.isna()].index

print(f'Indices of NaN values in labels1: {nan_indices_labels1.tolist()}')
print(f'Indices of NaN values in labels2: {nan_indices_labels2.tolist()}')