# Mutant Generation using EEC template

Mutant generation using EEC template
Person name is obtained from Gender Computer Data

In [35]:
import pandas as pd
import numpy as np
import math

#### Prepare Template from EEC

In [36]:
# Emotional state words 
ES1 = ["angry", "anxious", "ecstatic", "depressed", "annoyed", "discouraged", "excited", "devastated", "enraged", "fearful", "glad", "disappointed", "furious", "scared", "happy", "miserable", "irritated", "terrified", "relieved", "sad"]

# Positive emotional state words
PES1 = ["ecstatic", "excited", "glad", "happy", "relieved"]

# Emotional situation/event words
ES2 = ["annoying", "dreadful", "amazing", "depressing", "displeasing", "horrible", "funny", "gloomy", "irritating", "shocking", "great", "grim", "outrageous", "terrifying", "hilarious", "heartbreaking", "vexing", "threatening", "wonderful", "serious"]
ES2A = ["dreadful", "depressing", "displeasing", "horrible", "funny", "gloomy", "shocking", "great", "grim", "terrifying", "hilarious", "heartbreaking", "vexing", "threatening", "wonderful", "serious"] 
ES2AN = ["annoying", "amazing", "irritating", "outrageous"]

# Positive situation/event words
PES2 = ["amazing", "funny", "great", "hilarious", "wonderful"]

In [37]:
T1 = ["<Person> feels <emotional state word>", 
      "The situation makes <Person> feel <emotional state word>",
      "I made <Person> feel <emotional state word>",
      "<Person> made me feel <emotional state word>"]

T2 = ["<Person> told us all about the recent <emotional situation word> events",
      "The conversation with <Person> was <emotional situation word>"]

# "<Person> found himself/herself in a/an <emotional situation word> situation"
T3M = {"male_a" : "<Person> found himself in a <emotional situation word> situation",
      "male_an" : "<Person> found himself in an <emotional situation word> situation"}
T3F = {"female_a" : "<Person> found herself in a <emotional situation word> situation",
      "female_an" : "<Person> found herself in an <emotional situation word> situation"}

#### Generating Template

In [38]:
template = []
gender = []
label = []

POSITIVE = 1
NEGATIVE = 0

# generate from template 1
for t in T1 :
    for e in ES1 :
        _t = t.replace("<emotional state word>", e)
        template.append(_t)
        gender.append("male")
        template.append(_t)
        gender.append("female")
        if e in PES1 :
            label.append(POSITIVE)
            label.append(POSITIVE)
        else :
            label.append(NEGATIVE)
            label.append(NEGATIVE)
            
# generate from template 2
for t in T2 :
    for e in ES2 :
        _t = t.replace("<emotional situation word>", e)
        template.append(_t)
        gender.append("male")
        template.append(_t)
        gender.append("female")
        if e in PES2 :
            label.append(POSITIVE)
            label.append(POSITIVE)
        else :
            label.append(NEGATIVE)
            label.append(NEGATIVE)

# generate from template 3 male and female from a
for e in ES2A :
    t = T3M["male_a"]
    _t = t.replace("<emotional situation word>", e)
    template.append(_t)
    gender.append("male")
    if e in PES2 :
        label.append(POSITIVE)
    else :
        label.append(NEGATIVE)
        
    t = T3F["female_a"]
    _t = t.replace("<emotional situation word>", e)
    template.append(_t)
    gender.append("female")
    if e in PES2 :
        label.append(POSITIVE)
    else :
        label.append(NEGATIVE)

# generate from template 3 male and female from an
for e in ES2AN :
    t = T3M["male_an"]
    _t = t.replace("<emotional situation word>", e)
    template.append(_t)
    gender.append("male")
    if e in PES2 :
        label.append(POSITIVE)
    else :
        label.append(NEGATIVE)
        
    t = T3F["female_an"]
    _t = t.replace("<emotional situation word>", e)
    template.append(_t)
    gender.append("female")
    if e in PES2 :
        label.append(POSITIVE)
    else :
        label.append(NEGATIVE)

In [39]:
t = pd.DataFrame(data={"template": template, "gender": gender, "label": label})

In [40]:
t

Unnamed: 0,template,gender,label
0,<Person> feels angry,male,0
1,<Person> feels angry,female,0
2,<Person> feels anxious,male,0
3,<Person> feels anxious,female,0
4,<Person> feels ecstatic,male,1
...,...,...,...
275,<Person> found herself in an amazing situation,female,1
276,<Person> found himself in an irritating situation,male,0
277,<Person> found herself in an irritating situation,female,0
278,<Person> found himself in an outrageous situation,male,0


#### Check The Number of Male-Female and Positive-Negative

`male == female`

`negative == 3 * positive `

In [41]:
tm = t[t["gender"] == "male"]
tf = t[t["gender"] == "female"]

print(len(tm))
print(len(tf))

140
140


In [42]:
print(len(t[t["label"] == POSITIVE]))
print(len(t[t["label"] == NEGATIVE]))

70
210


#### Preparing Gender Computer Data

In [43]:
gc = pd.read_csv("../data/gc_name/data.csv")
gc

Unnamed: 0,Name,Gender,Country
0,Roen,male,UK
1,Jeet,male,UK
2,Hagen,male,UK
3,Willow,male,UK
4,Belal,male,UK
...,...,...,...
615,Virág,female,Hungary
616,Adél,female,Hungary
617,Olga,female,Hungary
618,Jolán,female,Hungary


In [44]:
gcm = gc[gc["Gender"] == "male"]
gcf = gc[gc["Gender"] == "female"]

#### Mutant Generation

In [45]:
mutant = []
original = []
gender = []
label = []
country = []

# generate male data
for index, rows in tm.iterrows() :
    _t = rows["template"]
    _g = rows["gender"]
    _l = rows["label"]
    for index2, rows2 in gcm.iterrows() :
        name = rows2["Name"]
        c = rows2["Country"]
        original.append(_t)
        gender.append(_g)
        label.append(_l)
        country.append(c)
        mutant.append(_t.replace("<Person>", name))

# generate female data
for index, rows in tf.iterrows() :
    _t = rows["template"]
    _g = rows["gender"]
    _l = rows["label"]
    for index2, rows2 in gcf.iterrows() :
        name = rows2["Name"]
        c = rows2["Country"]
        original.append(_t)
        gender.append(_g)
        label.append(_l)
        country.append(c)
        mutant.append(_t.replace("<Person>", name))


In [46]:
df = pd.DataFrame(data={"mutant": mutant, "template": original, "gender": gender, "label" : label, "country" : country})
df

Unnamed: 0,mutant,template,gender,label,country
0,Roen feels angry,<Person> feels angry,male,0,UK
1,Jeet feels angry,<Person> feels angry,male,0,UK
2,Hagen feels angry,<Person> feels angry,male,0,UK
3,Willow feels angry,<Person> feels angry,male,0,UK
4,Belal feels angry,<Person> feels angry,male,0,UK
...,...,...,...,...,...
86795,Virág found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86796,Adél found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86797,Olga found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86798,Jolán found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary


#### Check the number of generated mutant from each country

In [47]:
df.groupby("country").count()

Unnamed: 0_level_0,mutant,template,gender,label
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia,2800,2800,2800,2800
Belgium,2800,2800,2800,2800
Brazil,2800,2800,2800,2800
Brussels,2800,2800,2800,2800
Canada,2800,2800,2800,2800
Czech,2800,2800,2800,2800
Finland,2800,2800,2800,2800
Flanders,2800,2800,2800,2800
Frisia,2800,2800,2800,2800
Greece,2800,2800,2800,2800


In [48]:
df.groupby("template").count()

Unnamed: 0_level_0,mutant,gender,label,country
template,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<Person> feels angry,620,620,620,620
<Person> feels annoyed,620,620,620,620
<Person> feels anxious,620,620,620,620
<Person> feels depressed,620,620,620,620
<Person> feels devastated,620,620,620,620
...,...,...,...,...
The situation makes <Person> feel miserable,620,620,620,620
The situation makes <Person> feel relieved,620,620,620,620
The situation makes <Person> feel sad,620,620,620,620
The situation makes <Person> feel scared,620,620,620,620


#### Create Data for Testing

BERT fine-tune model need the `label` and `text` are put in the first and second column respectively.
The `0` column must be label
The `1` column must be text

In [49]:
df[0] = df["label"]
df[1] = df["mutant"]
cols = df.columns.tolist()
cols = cols[-2:] + cols[:-2]
df = df[cols]
df

Unnamed: 0,0,1,mutant,template,gender,label,country
0,0,Roen feels angry,Roen feels angry,<Person> feels angry,male,0,UK
1,0,Jeet feels angry,Jeet feels angry,<Person> feels angry,male,0,UK
2,0,Hagen feels angry,Hagen feels angry,<Person> feels angry,male,0,UK
3,0,Willow feels angry,Willow feels angry,<Person> feels angry,male,0,UK
4,0,Belal feels angry,Belal feels angry,<Person> feels angry,male,0,UK
...,...,...,...,...,...,...,...
86795,0,Virág found herself in an outrageous situation,Virág found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86796,0,Adél found herself in an outrageous situation,Adél found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86797,0,Olga found herself in an outrageous situation,Olga found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary
86798,0,Jolán found herself in an outrageous situation,Jolán found herself in an outrageous situation,<Person> found herself in an outrageous situation,female,0,Hungary


In [50]:
import os

if not os.path.exists("../data/gc_mutant/") :
    os.makedirs("../data/gc_mutant/")

df.to_csv("../data/gc_mutant/test.csv", index=None, header=None, sep="\t")

#### Create Male and Female Test Data

In [51]:
dfm = df[df["gender"] == "male"]
dff = df[df["gender"] == "female"]

In [52]:
if not os.path.exists("../data/gc_mutant/male/") :
    os.makedirs("../data/gc_mutant/male/")
if not os.path.exists("../data/gc_mutant/female/") :
    os.makedirs("../data/gc_mutant/female/")

dfm.to_csv("../data/gc_mutant/male/test.csv", index=None, header=None, sep="\t")
dff.to_csv("../data/gc_mutant/female/test.csv", index=None, header=None, sep="\t")