In [2]:
# gen data

import os
import re
import json
import copy
import time
import logging
import random
import pickle

from utils.chat2DeepSeek import Chat2DeepSeek
from utils.gen_prompt import Prompt
from utils.util import parse_trigger_answer, parse_argument_answer, save_json, load_json

In [3]:
API_KEY = 'sk-7015d98e24c9431f9fb7fb2a4454186e'
event_sen = {}
# event_sub_list = ['TRANSPORT', 'ELECT', 'START-POSITION', 'ATTACK', 'END-POSITION', 'MEET', 'MARRY', 'PHONE-WRITE', 'TRANSFER-MONEY', 'SUE']

# event_sen = load_json('./data_sen_5_28_11_20.json')
event_dict = load_json('./meta_data/event_dict_full.json') # defin from doc
trigger_dict = load_json('./meta_data/trigger_pool_ori.json')
arguments_dict = load_json('./meta_data/argument_pool_ori.json')



In [4]:
n_trigger = 100
n_argument = 20
sentence_per_event = 50
complex_score = [5, 10] # not implemented
max_argument = 2
max_event = 1
max_refine = 3

weight_dict = {0:[1],
                1:[1],
                2:[0.9, 0.1],
                3:[0.8, 0.15, 0.05],
                4:[0.5, 0.3, 0.1, 0.1]
                  }

In [5]:
def get_n_event(max_event, weight_dict):
    if max_event == 0:
        return 0
    return random.choices([ i for i in range(1, max_event + 1)], weights=weight_dict[max_event])

def get_local_time():
    return str(time.localtime().tm_mon) + '_' + str(time.localtime().tm_mday) + '_' + str(time.localtime().tm_hour) + '_' + str(time.localtime().tm_min)
    

In [6]:
refine = '''
### **Event Extraction Sentence Validation Protocol**  
**Objective**: Ensure generated sentences strictly comply with event structure, argument roles, and tense constraints.  

#### **1. Core Validation Checks**  
**1.1 Trigger Compliance**  
- ☑ **Explicit markup**: Triggers wrapped in `<EVENT><trigger>...</trigger></EVENT>`.  
- ☑ **Only past tense verb/nominalized verb/simple present tense verb for triggers**(e.g.,"elect", "meeting", "talks", "explosion" are valid)
- ☑ **No future/conditional/hypothetical triggers** (e.g., "will elect" is invalid).  
- ☑ **Only one trigger per sentence**

**1.2 Argument Role Alignment**  
- ☑ **Role definitions**: Arguments must act *exactly* as their abstract role requires
- ☑ **Role presence**: Arguments not in the Input(Task Execution -> Your Input of Event Extraction Sentence Generation Task) must not appear
- ☑ **No role leakage**: Arguments cannot modify other words.  
- ☑ **No TIME role**: If not provided in the INPUT, time information should not appear in the sentences(e.g., "aghdad witnessed explosions yesterday." ->"aghdad witnessed explosions." if TIME argument is not in the INPUT). 
- ☑ **Explicit markup**: Arguments wrapped in `<EVENT><ROLE>...</ROLE></EVENT>`.  

**1.3 Semantic Coherence**  
- ☑ **Arguments logically fit roles**:  
  - Example: `Entity` must be capable of voting (e.g., "government" ✅, "tree" ❌).  
- ☑ **No Need for Additional Arguments**:
  - Arguments in the INPUT can make sentence Logical(No Need for Additional Arguments to make sentence vaild)
- ☑ **No factual checks**: Unrealistic scenarios are allowed (e.g., "AI was elected in 1200 BC").  

---

#### **2. Revision Rules**  
**2.1 Mandatory Fixes**  
- ❌ **Misaligned arguments**: Rewrite to ensure arguments *act* as their role (e.g., delete "voters" to let `Entity="american"` function as the voter).  
- ❌ **Tense errors**: Replace future/conditional/hypothetical triggers (e.g., "will elect" → "elect"/"elected").  

**2.2 Prohibited Actions**  
- ⛔ **Never modify original arguments/triggers** (e.g., cannot change `Entity="american"` to `Entity="Americans"`).  
- ⛔ **No role swaps** (e.g., cannot repurpose `Place` as `Entity`).  

**2.3 Unfixable Errors**  
- If constraints cannot be met without violating rules, return  `{"sentences": []}`.  

---

#### **3. Examples**  
**Valid**:  
> *"<ELECT><Person>AI</Person></ELECT> was <ELECT><trigger>elected</trigger></ELECT> by <ELECT><Entity>robots</Entity></ELECT> in <ELECT><Place>Mars</Place></ELECT>."*  
- ✅ Past tense trigger.  
- ✅ `Entity="robots"` acts as voter.  

**Invalid → Revision**:  
> *"The <ELECT><Entity>government</Entity></ELECT> <ELECT><trigger>elected</trigger></ELECT> <ELECT><Person>Chiluba</Person></ELECT> in <ELECT><Place>Florida</Place></ELECT>."*  
- ❌ `Entity="government"` should vote *for* someone, not act as elector.  
- **Fix**: *"<ELECT><Person>Chiluba</Person></ELECT> was <ELECT><trigger>elected</trigger></ELECT> by <ELECT><Entity>government</Entity></ELECT> in <ELECT><Place>Florida</Place></ELECT>."*  

**Unfixable → Empty Output**:  
> Input: `{"Entity": "stone"}` (stones cannot vote).  
> Output: `{"sentences": []}`  

---

#### **4. Final Workflow**  

1. Run **all checks** (trigger, argument roles, coherence).  
2. if possible, Generate Revised sentence from INPUT and previous conversation.  
3. else, return empty.  


This protocol ensures strict adherence to linguistic and structural constraints while maintaining reproducibility.
'''

In [7]:
def chat4data_all(event_dict, gen_prompt, max_refine):
    for i, event in enumerate(event_dict, 1):
        if event_sen.get(event) and len(event_sen[event]) >= 50:
            print('---EVENT {} chated'.format(event))
            continue

        s_start_time = time.time()
        print('TIME {} -- chating for EVENT {}'.format(get_local_time(), event))
        if not event_sen.get(event):
            event_sen[event] = []
        while len(event_sen[event]) < 50: 
            prompt, record = gen_prompt.gen_data_prompt(main_event=event, n_event=1)
            event_sen[event].append(record)

In [14]:
gen_prompt = Prompt(
    event_dict = event_dict, 
    trigger_dict = trigger_dict, 
    arguments_dict = arguments_dict, 
    n_event = get_n_event(max_event, weight_dict)[0], 
    max_argument = max_argument,
    complex_score = complex_score)

no orignal record


In [16]:
prompt, record = gen_prompt.gen_data_prompt(main_event='ATTACK', n_event=1)# n_event=get_n_event(max_event, weight_dict)[0]

In [18]:
print(prompt)


Event Extraction Sentence Generation Task

Core Definitions
1. **Event**: An objective fact describing interactions between participants at specific time/location  
2. **Event Type**: Classification label (e.g., ELECT) determining argument structure  
3. **Trigger**: Explicit event indicator (verb/nominalized verb) that **must use past tense**  
4. **Arguments**: Core elements with semantic roles (e.g., Person/Entity/Time/Place)
Event Specifications:ATTACK, 
	Definition for ATTACK: An ATTACK Event is defined as a violent physical act causing harm or damage.  ATTACK Events include any such Event not covered by the INJURE or DIE subtypes, including Events where there is no stated agent.  The ATTACK Event type includes less specific violence-related nouns such as ‘conflict’, ‘clashes’, and ‘fighting’.  ‘Gunfire’, which has the qualities of both an Event and a weapon, should always be tagged as an ATTACK Event, if only for the sake of consistency.  A ‘coup’ is a kind of ATTACK (and so is 

In [29]:
prompt, record = gen_prompt.gen_rec_prompt(main_event='ATTACK', n_event=1)# n_event=get_n_event(max_event, weight_dict)[0]

In [31]:
print(prompt)

Event Record Cheak Task


Core Definitions
1. **Event**: An objective fact describing interactions between participants at specific time/location  
2. **Event Type**: Classification label (e.g., ELECT) determining argument structure  
3. **Trigger**: Explicit event indicator (verb/nominalized verb) that **must use past tense**  
4. **Arguments**: Core elements with semantic roles (e.g., Person/Entity/Time/Place)
Event Specifications:ATTACK, 
	Definition for ATTACK: An ATTACK Event is defined as a violent physical act causing harm or damage.  ATTACK Events include any such Event not covered by the INJURE or DIE subtypes, including Events where there is no stated agent.  The ATTACK Event type includes less specific violence-related nouns such as ‘conflict’, ‘clashes’, and ‘fighting’.  ‘Gunfire’, which has the qualities of both an Event and a weapon, should always be tagged as an ATTACK Event, if only for the sake of consistency.  A ‘coup’ is a kind of ATTACK (and so is a ‘war’). 
	Argume

In [11]:
print(record)

{"event_1": {"event": "ATTACK", "trigger": "smashed", "argument": {"Instrument": ["armor"], "Target": ["youths"]}}}


In [33]:
sens = load_json('./data_sen_5_23_12_0.json')


In [43]:
for e, rs in sens.items():
    print(e)
    for i, r in enumerate(rs):
        if i % 10 == 0:
            print(r['sentence']['sentences'])
    print()

ATTACK
['The <ATTACK><Attacker>king</Attacker></ATTACK> <ATTACK><trigger>smashed</trigger></ATTACK> a <ATTACK><Target>vehicle</Target></ATTACK> with <ATTACK><Instrument>rocket</Instrument></ATTACK> and <ATTACK><Instrument>cocktails</Instrument></ATTACK> during the uprising.']
['The assailants <ATTACK><trigger>dropped</trigger></ATTACK> explosives on the <ATTACK><Target>Guard</Target></ATTACK> at the checkpoint.']
['The radical <ATTACK><Attacker>wing</Attacker></ATTACK> brutally <ATTACK><trigger>abused</trigger></ATTACK> a <ATTACK><Target>division</Target></ATTACK> in the <ATTACK><Place>town</Place></ATTACK> during the riots.']
['In <ATTACK><Place>Sudan</Place></ATTACK>, <ATTACK><Attacker>banks</Attacker></ATTACK> brutally <ATTACK><trigger>raped</trigger></ATTACK> <ATTACK><Target>airliners</Target></ATTACK> during the conflict.']
['The <ATTACK><Attacker>conspirator</Attacker></ATTACK> <ATTACK><trigger>threw</trigger></ATTACK> explosives from a <ATTACK><Instrument>warplane</Instrument></

In [13]:
# for e, rs in event_sen.items():
#     print(e)
#     for i, r in enumerate(rs):
#         if i % 10 == 0:
#             print(r)
#     print('*'*25)

In [None]:
'''
Complexity Control
    Constraints:
        1.Trigger and arguments must remain unchanged (exact words/phrases provided in input).
        2.Complexity is adjusted only through:
            Lexical: Vocabulary richness around the fixed trigger/arguments.
    
    
    Metric for Lexical Complexity:
        Description	Example (Fixed trigger: "bomb", Arguments: state, team, warplanes)
        Lexical=1:	Basic syntax, minimal modifiers.(e.g., "The state bombed the team with warplanes.")
        Lexical=2:	Moderate adjectives/adverbs, prepositional phrases.(e.g., "The state violently bombed the fleeing team using advanced warplanes.")
        Lexical=3:	Advanced diction, figurative language, or technical terms.(e.g., "The state executed a relentless bombing campaign, decimating the beleaguered team via supersonic warplanes.")
    Default Complexity Request: If unspecified, generate at Lexical=2.'''