In [129]:
from google.colab import userdata
gemini_key = userdata.get('gemini_key')

In [130]:
import os
import torch
import random
random.seed(28)
import json
import csv

In [131]:
def gemini_setup(api_key):
    os.system("pip install -q -U google-generativeai")
    import google.generativeai as genai
    genai.configure(api_key=gemini_key)
    glm_config = genai.GenerationConfig(temperature=0.99)
    safety_settings = [
        {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
    ]
    gemini_model = genai.GenerativeModel('gemini-2.0-flash', generation_config=glm_config, safety_settings=safety_settings)
    return gemini_model

device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [132]:
gemini_model = gemini_setup(gemini_key)

In [133]:
def populate_data_for_permutations(prompt, data_content):
  data_list = []
  response = gemini_model.generate_content(contents= prompt)
  for i in response.text.strip().split('\n'):
    data_list.append(i)
  print("The data generated for {}:{}\n The length of the list is: {}".format(data_content,data_list,len(data_list)))
  return data_list

In [None]:
def convert_list_to_txt(list_,filename):
  with open(filename,'w+') as f:
    for item in list_:
      f.write("%s\n" %item)
  print("List converted to {}".format(filename))


In [136]:
prompt_cities = ("List 100 different cities from the US with maximum crime rates. Make each city as a text in a seperate line. Dont include another other text.")
cities = populate_data_for_permutations(prompt_cities,"cities")

KeyboardInterrupt: 

In [None]:
cities = cities[2:102]

In [None]:
convert_list_to_txt(cities,"cities.txt")

List converted to cities.txt


In [None]:
prompt_locations= ("List 100 different locations like bar, park, etc where there is public movement and a crime can be reported. Make each location as a text in a separate line. Do not include any other text.")
locations = populate_data_for_permutations( prompt_locations,"locations")

The data generated for locations:['Bar', 'Park', 'Restaurant', 'Shopping Mall', 'Grocery Store', 'Gas Station', 'Bus Stop', 'Train Station', 'Airport', 'Hospital', 'School', 'University', 'Library', 'Movie Theater', 'Concert Venue', 'Sports Stadium', 'Museum', 'Art Gallery', 'Hotel Lobby', 'Public Beach', 'Swimming Pool', 'Gym', 'Community Center', 'Post Office', 'Bank', 'ATM', 'Parking Garage', 'Parking Lot', 'Street Corner', 'Sidewalk', 'Crosswalk', 'Public Square', 'Town Hall', 'Courthouse', 'Police Station', 'Fire Station', 'Amusement Park', 'Zoo', 'Botanical Garden', 'Campground', 'Hiking Trail', 'Bike Path', 'Skate Park', 'Dog Park', 'Playground', 'Construction Site', 'Factory', 'Warehouse', 'Office Building', 'Retail Store', 'Hair Salon', 'Spa', "Doctor's Office", "Dentist's Office", 'Pharmacy', 'Veterinary Clinic', 'Car Dealership', 'Bus Terminal', 'Taxi Stand', 'Ferry Terminal', 'Marina', 'Port', 'Truck Stop', 'Rest Area', 'Homeless Shelter', 'Soup Kitchen', 'Food Bank', 'Thri

In [None]:
locations = locations[:100]


In [None]:
convert_list_to_txt(locations,"locations.txt")

List converted to locations.txt


In [None]:
time_prompt = ("List 20 time settings around a day, like morning, evening, afternoon, etc where an event like a crime can happen. You can repeat the same in different wordings. Make each time setting as a text in a separate line. Do not include any other text")
times = populate_data_for_permutations(time_prompt,"time settings")

The data generated for time settings:['Dawn', 'Sunrise', 'Early Morning', 'Mid-Morning', 'Late Morning', 'Noon', 'Midday', 'Early Afternoon', 'Afternoon', 'Late Afternoon', 'Dusk', 'Sunset', 'Early Evening', 'Evening', 'Late Evening', 'Night', 'Midnight', 'Early Hours', 'Dead of Night', 'Predawn']
 The length of the list is: 20


In [None]:
convert_list_to_txt(times,"time_settings.txt")

List converted to time_settings.txt


In [None]:
victim_age_prompt = "List 10 different ages that are common for victims of murder. Make each age in a seperate line. Do no repeat ages. Do not include any other text"
victim_characteristics_age=populate_data_for_permutations(victim_age_prompt,"victim ages")


The data generated for victim ages:['16', '18', '22', '28', '31', '35', '40', '47', '52', '68']
 The length of the list is: 10


In [None]:
convert_list_to_txt(victim_characteristics_age,"victim_ages.txt")

List converted to victim_ages.txt


In [None]:
victim_gender_prompt = "List 5 different genders that are common among victims of murder. Make each gender in a seperate line. Do not repeat genders, Do not include any other text"
victim_genders = populate_data_for_permutations(victim_gender_prompt, "victim gender")

The data generated for victim gender:['Man', 'Woman', 'Transgender man', 'Transgender woman', 'Non-binary person']
 The length of the list is: 5


In [None]:
convert_list_to_txt(victim_genders,"victim_genders.txt")

List converted to victim_genders.txt


In [None]:
victim_occupation_prompt = " List 10 occupations that are most probably prone to being a victim of murder. Make each occupation in a separate line. Do not repeat. Do not include any other text"
victim_occcupation = populate_data_for_permutations(victim_occupation_prompt, "victim occupation")

The data generated for victim occupation:['1.  Law Enforcement Officer', '2.  Security Guard', '3.  Taxi Driver', '4.  Journalist', '5.  Corrections Officer', '6.  Drug Dealer', '7.  Activist', '8.  Judge', '9.  Bartender', '10. Social Worker']
 The length of the list is: 10


In [None]:
convert_list_to_txt(victim_occcupation,"victim_occupation.txt")

List converted to victim_occupation.txt


In [None]:
victim_personality_prompt = "List 10 persoanlities of a victim of a murder. Make each personality in a seperate line. Do not repeat. Do not include any other text."
victim_personality = populate_data_for_permutations(victim_personality_prompt,"victim personality")

The data generated for victim personality:['Kind', 'Ambitious', 'Withdrawn', 'Anxious', 'Optimistic', 'Resilient', 'Carefree', 'Cynical', 'Creative', 'Stubborn']
 The length of the list is: 10


In [None]:
convert_list_to_txt(victim_personality,"victim_personality.txt")

List converted to victim_personality.txt


In [None]:
suspect_age_prompt= "List 10 dfferent ages that are common among murder suspects. Make each age in a seperate line. Do not repeat. Do not include any other text."
suspect_age = populate_data_for_permutations(suspect_age_prompt,"suspect_age")

The data generated for suspect_age:['17', '19', '22', '25', '28', '31', '34', '37', '40', '43']
 The length of the list is: 10


In [None]:
convert_list_to_txt(suspect_age,"suspect_age.txt")

List converted to suspect_age.txt


In [None]:
suspect_gender_prompt="List 5 different genders that are common among suspects of murder. Make each gender in a seperate line. Do not repeat genders, Do not include any other text"
suspect_gender = populate_data_for_permutations(suspect_gender_prompt,"suspect_gender")

The data generated for suspect_gender:['Male', 'Female', 'Transgender Male', 'Transgender Female', 'Non-binary']
 The length of the list is: 5


In [None]:
convert_list_to_txt(suspect_gender,"suspect_gender.txt")

List converted to suspect_gender.txt


In [None]:
suspect_physique_prompt = " List 100 different instances of physical characteristics of a murder suspect. Make each output in a seperate line. Do not repeat the outputs. Do not include any other text"
suspect_physique = populate_data_for_permutations(suspect_physique_prompt,"suspect_physique")

The data generated for suspect_physique:['Visible tattoos', 'Scar above left eyebrow', 'Missing left pinky finger', "Protruding Adam's apple", 'Crossed eyes', 'Burn scars on hands', 'Thick, bushy eyebrows', 'Receding hairline', 'Large, prominent nose', 'Small, beady eyes', "Widow's peak", 'Unusually long fingers', 'Short, stubby fingers', 'Birthmark on neck', 'Cleft chin', 'Dimples', 'Attached earlobes', 'Unattached earlobes', 'High cheekbones', 'Low cheekbones', 'Thin lips', 'Full lips', 'Overbite', 'Underbite', 'Gap between front teeth', 'Crooked teeth', 'Double chin', 'Sunken eyes', 'Pale complexion', 'Ruddy complexion', 'Freckles on face', 'Mole on cheek', 'Acne scars', 'Rosacea', 'Wears glasses', 'Lazy eye', 'Drooping eyelid', 'Bushy sideburns', 'Pointed ears', 'Square jaw', 'Round face', 'Oval face', 'Long face', 'Wrinkled forehead', 'Deep laugh lines', 'Thin eyebrows', 'Bushy eyebrows', 'Long eyelashes', 'Short eyelashes', 'Thick neck', 'Thin neck', 'Sloping shoulders', 'Broad s

In [None]:
suspect_physique = suspect_physique[:100]

In [None]:
convert_list_to_txt(suspect_physique,"suspect_physique.txt")

List converted to suspect_physique.txt


In [None]:
suspect_occupation_prompt = "List 10 different occupations that is common among murder suspects. Output the occupation in a singular form. Make each output in a seperate line. Do not repeat the outputs. Do not include any other text."
suspect_occupation = populate_data_for_permutations(suspect_occupation_prompt,"suspect_occupation")

The data generated for suspect_occupation:['Laborer', 'Driver', 'Security guard', 'Business owner', 'Healthcare worker', 'Teacher', 'Clergy member', 'Military personnel', 'Farmer', 'Unemployed']
 The length of the list is: 10


In [None]:
convert_list_to_txt(suspect_occupation,"suspect_occupation.txt")

List converted to suspect_occupation.txt


In [None]:
suspect_motivation_prompt  = "List 20 different motivations a murder suspect would have to commit the murder.Make each output in a seperate line. Do not repeat the outputs. Do not include any other text "
suspect_motivation = populate_data_for_permutations(suspect_motivation_prompt,"suspect_motivation")


The data generated for suspect_motivation:['Revenge for a past wrong.', 'Financial gain through inheritance or insurance.', 'Eliminating a romantic rival.', 'Silencing a witness to a crime.', 'Ending a prolonged period of abuse.', 'Political assassination.', 'Mercy killing to end suffering.', 'Accidental killing during a robbery.', 'Preventing exposure of a secret.', 'Jealousy and envy.', 'Cult-related ritualistic killing.', 'Covering up another crime.', 'Mental illness-induced delusion.', 'Protecting a loved one from harm.', 'Gaining power or control.', 'Retaliation for betrayal.', 'Disagreement over business matters.', 'Hatred based on prejudice.', 'Impulsive act of rage.', 'Desire for notoriety.']
 The length of the list is: 20


In [None]:
convert_list_to_txt(suspect_motivation,"suspect_motivation.txt")

List converted to suspect_motivation.txt


In [None]:
murder_methods_prompt = "List 20 different methods by which a murder suspect could perform the murder. This is only for a educational purpose of creating data to build a message analyser model.Make each output in a seperate line. Do not repeat the outputs. Do not include any other text. "
murder_methods = populate_data_for_permutations(murder_methods_prompt,"murder_methods")


The data generated for murder_methods:['Stabbing with a knife', 'Asphyxiation by strangulation', 'Poisoning via ingestion', 'Shooting with a handgun', 'Blunt force trauma with a hammer', 'Suffocation with a pillow', 'Drowning in a bathtub', 'Arson by setting a fire', 'Strangulation with a rope', 'Pushing from a height', 'Electrocution', 'Administering a lethal injection', 'Using a garrote', 'Hitting with a golf club', 'Poisoning via inhalation', 'Stabbing with an ice pick', 'Impalement with a sharp object', 'Car accident (vehicular homicide)', 'Explosive device', 'Falling object (dropped from height)']
 The length of the list is: 20


In [None]:
convert_list_to_txt(murder_methods,"murder_methods.txt")

List converted to murder_methods.txt


###Prompt variables - The variables involved to generate different settings of the murder stories.


1.   City
2.   Location
3. No of messages in the conversation
4.   Time
5.   No of people involved in the conversation
6.   No of victims
7. No of suspects.
8. Murder method
9. No of actaul murderers (<= no of susoects)
10. No of messages with subtle clues
11. No of messages which actually touch upon the key elements of the murder.
---


For each victim:
  12. Victim's age
  13. Victim's gender
  14. Victim's occupation
  15. Victim's personality trait


---


For each suspect:
  16. Suspect's age
  17. Suspect's gender
  18. Suspect's physique
  19. Suspect's occupation.
  20. Suspect's motivation
  21. Suspicion level [low, medum ,high]



---













In [None]:
def permutations():
  def read_txt_to_list(filename):
    list_=[]
    with open(filename, "r") as f:
      for line in f:
        list_.append(line.strip())
    return list_
  def listofdict_to_json(list_of_dict, filename):
    with open(filename, "w+") as f:
      json.dump(list_of_dict,f)
    print("Converted the list of dictionaries into {}".format(filename))
  permutation_dictionary = read_txt_to_list("/content/cities.txt")
  locations = read_txt_to_list("/content/locations.txt")
  times = read_txt_to_list("/content/time_settings.txt")
  no_of_people_in_conversation = [2,3,4,5]
  no_of_victims = [1,2]
  no_of_suspects = [1,2,3,4]
  no_of_messages = [ i for i in range(20,200,5)]
  victim_age = read_txt_to_list("/content/victim_ages.txt")
  victim_gender = read_txt_to_list("/content/victim_genders.txt")
  victim_occupation = read_txt_to_list("/content/victim_occupation.txt")
  victim_personality = read_txt_to_list("/content/victim_personality.txt")
  suspect_age = read_txt_to_list("/content/suspect_age.txt")
  suspect_gender = read_txt_to_list("/content/suspect_gender.txt")
  suspect_physique = read_txt_to_list("/content/suspect_physique.txt")
  suspect_occupation = read_txt_to_list("/content/suspect_occupation.txt")
  suspect_motivation= read_txt_to_list("/content/suspect_motivation.txt")
  murder_nethods = read_txt_to_list("/content/murder_methods.txt")
  permutations = []
  for i in range(200):
    permutation_dictionary = {}
    permutation_dictionary["city"] = random.choice(cities)
    permutation_dictionary["location"] = random.choice(locations)
    permutation_dictionary["time"] = random.choice(times)
    permutation_dictionary["no_of_people_in_conversation"] = random.choice(no_of_people_in_conversation)
    permutation_dictionary["murder_method"] = random.choice(murder_methods)
    permutation_dictionary["no_of_victims"] = random.choice(no_of_victims)
    permutation_dictionary["no_of_suspects"] = random.choice(no_of_suspects)
    permutation_dictionary["no_of_messages"] = random.choice(no_of_messages)
    for j in range(1,permutation_dictionary["no_of_victims"]+1):
      victim_dictionary={}
      victim_dictionary["age"] = random.choice(victim_age)
      victim_dictionary["gender"] = random.choice(victim_gender)
      victim_dictionary["occupation"] = random.choice(victim_occupation)
      victim_dictionary["personality"] = random.choice(victim_personality)
      permutation_dictionary["victim_{}".format(j)] = victim_dictionary
    for k in range(1,permutation_dictionary["no_of_suspects"]+1):
      suspect_dictionary = {}
      suspect_dictionary["age"] = random.choice(suspect_age)
      suspect_dictionary["gender"] = random.choice(suspect_gender)
      suspect_dictionary["physqiue"] = random.choice(suspect_physique)
      suspect_dictionary["occupation"] = random.choice(suspect_occupation)
      suspect_dictionary["motivation"] = random.choice(suspect_motivation)
      permutation_dictionary["suspect_{}".format(k)] = suspect_dictionary
    permutations.append(permutation_dictionary)
  listofdict_to_json(permutations,"permutations.json")



In [None]:
permutations()

Converted the list of dictionaries into permutations.json


In [103]:
def json_to_list_of_dicts(jsonfile):
  l=[]
  with open(jsonfile,"r") as f:
    data = json.load(f)
    for di in data:
      l.append(di)
    return l



In [104]:
permutations = json_to_list_of_dicts("/content/permutations.json")

In [105]:
len(permutations)

200

In [106]:
permutations[0]

{'city': 'Indianapolis, IN',
 'location': 'Spa',
 'time': 'Midday',
 'no_of_people_in_conversation': 5,
 'murder_method': 'Falling object (dropped from height)',
 'no_of_victims': 1,
 'no_of_suspects': 4,
 'no_of_messages': 130,
 'victim_1': {'age': '40',
  'gender': 'Transgender woman',
  'occupation': '4.  Journalist',
  'personality': 'Kind'},
 'suspect_1': {'age': '19',
  'gender': 'Transgender Female',
  'physqiue': 'Unusually long fingers',
  'occupation': 'Settling a personal vendetta.',
  'motivation': 'Cult-related ritualistic killing.'},
 'suspect_2': {'age': '37',
  'gender': 'Transgender Female',
  'physqiue': 'Age spots on hands',
  'occupation': 'Financial gain through inheritance.',
  'motivation': 'Eliminating a romantic rival.'},
 'suspect_3': {'age': '43',
  'gender': 'Transgender Female',
  'physqiue': 'Tremor in hands',
  'occupation': 'Mercy killing due to severe suffering.',
  'motivation': 'Cult-related ritualistic killing.'},
 'suspect_4': {'age': '22',
  'gende

In [107]:
def prompt_string_formation(type_,data):
  if type_=="victim":
    filename = "/content/true_positives_victims.txt"
    dictionary_variable_1 = "no_of_victims"
    dictionary_variable_2="victim_{}"
  else:
    filename = "/content/true_positives_suspects.txt"
    dictionary_variable_1 = "no_of_suspects"
    dictionary_variable_2="suspect_{}"
  with open(filename, "r") as f:
    prompt_part = f.read()
  str_=""
  for i in range(1,data[dictionary_variable_1]+1):
    if type_=="victim":
      str_+= prompt_part.format(i,data[dictionary_variable_2.format(i)]["age"],data[dictionary_variable_2.format(i)]["gender"],data[dictionary_variable_2.format(i)]["occupation"],data[dictionary_variable_2.format(i)]["personality"])
    else:
      str_+=prompt_part.format(i,data[dictionary_variable_2.format(i)]["age"],data[dictionary_variable_2.format(i)]["gender"],data[dictionary_variable_2.format(i)]["physqiue"],data[dictionary_variable_2.format(i)]["occupation"],data[dictionary_variable_2.format(i)]["motivation"],random.choice(["low","medium","high"]))
  return str_



In [108]:
str_=prompt_string_formation("suspect",permutations[0])

In [109]:
print(str_)

Suspect 1 Profile:
   • Age: 19
   • Gender: Transgender Female
   • Physique: Unusually long fingers
   • Occupation: Settling a personal vendetta.
   • Stated Motivation: Cult-related ritualistic killing.
   • Suspicion Level: low
Suspect 2 Profile:
   • Age: 37
   • Gender: Transgender Female
   • Physique: Age spots on hands
   • Occupation: Financial gain through inheritance.
   • Stated Motivation: Eliminating a romantic rival.
   • Suspicion Level: high
Suspect 3 Profile:
   • Age: 43
   • Gender: Transgender Female
   • Physique: Tremor in hands
   • Occupation: Mercy killing due to severe suffering.
   • Stated Motivation: Cult-related ritualistic killing.
   • Suspicion Level: low
Suspect 4 Profile:
   • Age: 22
   • Gender: Non-binary
   • Physique: Visible tattoos
   • Occupation: Financial gain through inheritance.
   • Stated Motivation: Political assassination.
   • Suspicion Level: high



In [110]:
def prompt(permutation):
  with open("/content/true_postive_first_part.txt", "r") as f:
    true_positives_first_part = f.read()
  with open("/content/true_psoitives_last_part.txt", "r") as f:
    true_positives_last_part = f.read()
  true_positives_first_part = true_positives_first_part.format(random.randint(1,permutation["no_of_suspects"]),permutation["city"],permutation["location"],permutation["time"],permutation["no_of_messages"],permutation["no_of_people_in_conversation"],permutation["no_of_victims"],permutation["no_of_suspects"],permutation["murder_method"])
  true_positives_victims = prompt_string_formation(type_="victim",data = permutation)
  true_positives_suspects = prompt_string_formation(type_="suspect",data = permutation)
  true_positives_last_part = true_positives_last_part.format(random.randint(1,permutation["no_of_suspects"]),permutation["no_of_suspects"]-1,random.randint(3,7),random.randint(3,7),permutation["time"],permutation["murder_method"], permutation["victim_1"]["occupation"],permutation["murder_method"],permutation["no_of_messages"],random.randint(1,4))
  return true_positives_first_part+true_positives_victims+ true_positives_suspects+true_positives_last_part

In [111]:
prompt(permutations[0])

'Context: The conversation should contain text messages where:\n   - Only 2 suspects are actually involved in the crime\n   - Some suspects show genuine innocence\n   - Some might have hidden motives that aren\'t obvious\n   - Include red herrings and false leads\n\nSetting:\nA brutal murder has shaken the city of Indianapolis, IN, specifically at Spa. \nThe crime occurred at Midday.\nAuthorities retrieved 130 messages between 5 people.\n\nCrime Profile:\n• Victims: 1\n• Suspects: 4\n• Murder method: Falling object (dropped from height)Victim 1 Details:\n   • Age: 40\n   • Gender: Transgender woman\n   • Occupation: 4.  Journalist\n   • Personality Trait: Kind\nSuspect 1 Profile:\n   • Age: 19\n   • Gender: Transgender Female\n   • Physique: Unusually long fingers\n   • Occupation: Settling a personal vendetta.\n   • Stated Motivation: Cult-related ritualistic killing.\n   • Suspicion Level: low\nSuspect 2 Profile:\n   • Age: 37\n   • Gender: Transgender Female\n   • Physique: Age spot

In [112]:
def generate(prompt, gemini_model):
  response = gemini_model.generate_content(contents= prompt)
  return response


In [None]:
def ground_truth(generation_file):


In [117]:
def save_messages_as_csv():
  conversations = []
  for num,i in enumerate(permutations[0:2]):
    prompt_ = prompt(i)
    conversation = generate(prompt_,gemini_model)
    conversations.append(conversation.text)
    print("Conversation {} done".format(num))
  with open("conversations.csv", "w+") as f:
    writer = csv.writer(f)
    for conversation in conversations:
      writer.writerow([conversation])
  print("conversations.csv is saved!")


In [118]:
save_messages_as_csv()

KeyboardInterrupt: 