In [3]:
import sqlite3
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer
model_name = "ariannap22/collectiveaction_roberta_simplified_synthetic_weights"
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Connect to SQLite3 database
conn = sqlite3.connect('data.db')
cursor = conn.cursor()

# Fetch first 100 rows with turnText data
cursor.execute("SELECT rowid, turnText FROM speaker_turns WHERE turnText IS NOT NULL LIMIT 100")
rows = cursor.fetchall()

# Create a column for storing predictions (if it doesn't exist)
try:
    cursor.execute("ALTER TABLE speaker_turns ADD COLUMN predictedClassIndex INTEGER")
except sqlite3.OperationalError:
    print("Column 'predictedClassIndex' already exists.")

# Process texts and make predictions
for row_id, text in rows:
    if text.strip():  # Skip empty or blank texts
        # Tokenize the text
        inputs = tokenizer(
            [text],
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(device)

        # Perform prediction
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=-1)
            predicted_class_index = torch.argmax(probs, dim=-1).item()

        # Update the database with the prediction
        cursor.execute(
            "UPDATE speaker_turns SET predictedClassIndex = ? WHERE rowid = ?",
            (predicted_class_index, row_id)
        )

# Commit changes and close connection
conn.commit()
conn.close()

print("Predictions added successfully for the first 100 rows.")

Column 'predictedClassIndex' already exists.
Predictions added successfully for the first 100 rows.


In [1]:
# import sqlite3
# import pandas as pd

# # Connect to SQLite database
# conn = sqlite3.connect('data.db', timeout=10)

# # Query the first 10 rows
# query = "SELECT * FROM speaker_turns WHERE predictedClassIndex = 0 LIMIT 100"
# df = pd.read_sql_query(query, conn)

# # Close the connection
# conn.close()

# # Display the rows
# df

In [7]:
import sqlite3
import pandas as pd

# Connect to SQLite database
conn = sqlite3.connect('data.db', timeout=10)

# Query the first 100 rows with predictedClassIndex = 0
query = "SELECT turnText FROM speaker_turns WHERE predictedClassIndex = 0 LIMIT 100"
df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Save the turnText column to a CSV file
df.to_csv('../data/test_speaker_turn_action.csv', index=False)

print("CSV file saved successfully.")

CSV file saved successfully.


In [8]:
df

Unnamed: 0,turnText
0,I'm Simon Shapiro and this is Sing Out Speak ...
1,I'm Simon Shapiro and this is Sing Out Speak ...
2,reflection If you're never gonna change direc...
3,"If you like the song, it's available at all t..."
4,I'm Simon Shapiro and this is Sing Out Speak ...
5,song it's available at all the places you usu...
6,I'm Simon Shapiro and this is Sing Out Speak ...
7,requires us to be thinking about long term sy...
8,"and completion, a deep focus on student succe..."
9,So let me talk about some of that partnership...


In [5]:
import sqlite3

# Connect to SQLite3 database
conn = sqlite3.connect('data.db')
cursor = conn.cursor()

# Query to count rows with predictedClassIndex = 0 in the first 100 rows
cursor.execute("""
    SELECT COUNT(*) 
    FROM (SELECT predictedClassIndex 
          FROM speaker_turns 
          WHERE predictedClassIndex = 0 
          LIMIT 100)
""")
count = cursor.fetchone()[0]

conn.close()

print(f"Number of rows with predictedClassIndex = 0 in the first 100 rows: {count}")

Number of rows with predictedClassIndex = 0 in the first 100 rows: 23
