In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset from Kaggle's environment
# Kaggle datasets are usually accessed from /kaggle/input/[dataset-name] directory
df = pd.read_csv('/kaggle/input/ai-human/AI_Human.csv')

# Display the first few rows of the dataset
print(df.head())

# Extract features and labels
comments = df['text'].astype(str).tolist()
labels = df['generated'].tolist()

# Vectorize the text data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(comments)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train a simple model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred, zero_division=0))


                                                text  generated
0  Cars. Cars have been around since they became ...        0.0
1  Transportation is a large necessity in most co...        0.0
2  "America's love affair with it's vehicles seem...        0.0
3  How often do you ride in a car? Do you drive a...        0.0
4  Cars are a wonderful thing. They are perhaps o...        0.0
Accuracy: 0.9947253378759736
Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     61112
         1.0       1.00      0.99      0.99     36335

    accuracy                           0.99     97447
   macro avg       1.00      0.99      0.99     97447
weighted avg       0.99      0.99      0.99     97447

