In [1]:
from google.colab import userdata
gemini_key = userdata.get('gemini_key')

In [47]:
import os
import torch
import random
random.seed(28)
import json
import csv

In [3]:
def gemini_setup(api_key):
    os.system("pip install -q -U google-generativeai")
    import google.generativeai as genai
    genai.configure(api_key=gemini_key)
    glm_config = genai.GenerationConfig(temperature=0.99)
    safety_settings = [
        {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
    ]
    gemini_model = genai.GenerativeModel('gemini-2.0-flash', generation_config=glm_config, safety_settings=safety_settings)
    return gemini_model

device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [4]:
gemini_model = gemini_setup(gemini_key)

Question Generation Pipeline

In [50]:
def prompt():
  prompt_text = f"""
  You are a legal expert designing investigative questions for analyzing crime-related conversations. Your task is to design investigative questions for extracting specific evidence from crime-related conversations. The crime in question here is murder.

  Generate 100 questions that:
  Are designed to retrieve direct text messages from a conversation.
  Do not assume any names, times, locations, or methods.
  Are framed to help investigators uncover critical details through careful questioning.
  Are clear, specific, and structured to return direct messages, not summaries or opinions.

  The questions should be focused on uncovering elements of a crime, including:
  Actus Reus (Guilty Act) – What actions were taken? How did they lead to harm?
  Mens Rea (Guilty Mind) – What was the intent behind the act? Was it planned or spontaneous?
  Causation – How did the event lead to harm? Were there external factors involved?
  Concurrence – Did intent and action happen together?
  Attempt & Conspiracy – Was the crime planned but not carried out?
  Possible Defenses – Could there be self-defense or mitigating circumstances?

  Example questions:
  "Show me messages in which one person asks about another person's age."
  "Extract all messages that mention someone's age."
  "Return all text messages where age is discussed."
  "Find any message where one person asks another about their age."
  "Find messages where someone expresses a desire to harm another person."
  "Extract messages where anyone mentions planning something bad."
  "Show all conversations where someone discusses how an act was carried out."

  Now generate 100 investigative questions following these guidelines.
  """
  return prompt_text

In [43]:
def generate(prompt, gemini_model):
  response = gemini_model.generate_content(contents=prompt)
  questions = [line.strip() for line in response.text.strip().split("\n") if line.strip()]
  return questions

In [51]:
def ground_truth(generation_file): # Not required
  pass

In [44]:
def save_as_csv(questions, filename="investigative_questions.csv"):
  with open(filename, 'w', newline="") as f:
    writer= csv.writer(f)
    # writer.writerow(["Question"])

    for question in questions:
      writer.writerow([question])

  print(f"Generated questions saved to {filename}")

In [49]:
prompt_text = prompt()
questions = generate(prompt_text, gemini_model)
questions
save_as_csv(questions)

Generated questions saved to investigative_questions.csv
