In [1]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM

os.environ["WANDB_DISABLED"] = "true"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'


2024-12-26 13:19:29.733800: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
class BottleneckT5Autoencoder:
    def __init__(self, model_path: str, device='cpu'):
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, model_max_length=512)
        self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(self.device)
        self.model.eval()

    @torch.no_grad()
    def embed(self, text: str) -> torch.FloatTensor:
        inputs = self.tokenizer(text, return_tensors='pt').to(self.device)
        decoder_inputs = self.tokenizer('', return_tensors='pt').to(self.device)
        return self.model(
            **inputs,
            decoder_input_ids=decoder_inputs['input_ids'],
            encode_only=True,
        )[0]

    @torch.no_grad()
    def generate_from_latent(self, latent: torch.FloatTensor, max_length=512, temperature=0.8) -> str:
        dummy_text = ' '
        dummy = self.embed(dummy_text)
        perturb_vector = latent - dummy
        #perturb_vector = latent
        self.model.perturb_vector = perturb_vector
        input_ids = self.tokenizer(dummy_text, return_tensors='pt').to(self.device).input_ids
        output = self.model.generate(
            input_ids=input_ids,
            max_length=max_length,
            do_sample=True,
            temperature=temperature,
            top_p=0.9,
            num_return_sequences=1,
        )
        return self.tokenizer.decode(output[0], skip_special_tokens=True)

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.cuda.set_per_process_memory_fraction(0.8)

#baseline
#autoencoder = BottleneckT5Autoencoder(model_path='thesephist/contra-bottleneck-t5-base-wikipedia', device=device) 

#fine-tuned model
autoencoder = BottleneckT5Autoencoder(model_path='mmendoza/t5-delitos-full', device=device)

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


bottleneck_t5.py:   0%|          | 0.00/18.9k [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/marcelo/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda120.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda-12.0/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 120
CUDA SETUP: Loading binary /home/marcelo/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda120.so...


  warn(msg)


pytorch_model.bin:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

  return torch.load(checkpoint_file, map_location="cpu")


In [11]:
from pprint import pprint as pp

texts = [
    'A brutal shooting took place in Providencia, leaving two minors in critical condition. The victims, aged 11 and 13, are currently receiving medical attention, while a 17-year-old teenager tragically lost his life in the incident.',
]

for t in texts:
    embedding = autoencoder.embed(t)
    pp(autoencoder.generate_from_latent(embedding))

('date> 2022-07-17 event> Minor injuries Hospitals and clinics position> '
 'Santiago, Chile. Twelve children are born in desperate condition, taking '
 'refuge in an abandoned area, while two infants are critically injured. The '
 'youngest child is born, aged two years, indicating suffering acute injuries '
 'in childhood. The overall condition of the public space is clean, with one '
 'child, aged two-years, saved.')


## Q1: Slerp (a to b)

In [12]:
def slerp(a, b, n, eps=1e-8):
    a_norm = a / torch.norm(a)
    b_norm = b / torch.norm(b)
    omega = torch.acos((a_norm * b_norm).sum()) + eps
    so = torch.sin(omega)
    return (torch.sin((1.0 - n) * omega) / so) * a + (torch.sin(n * omega) / so) * b

### Geographic interpolation

In [13]:
start = '<date> 2012-12-01 <event> Motor vehicle theft Not determined <position> Santiago <place> a street with a green wall that has graffiti on it. There are no clear indications of the condition of the buildings, but there is a construction site visible in the background. The public infrastructure includes a sidewalk and a street with a motorcycle parked on the side. The cleanliness of the street appears to be average, with no visible litter or debris.'
end = '<date> 2020-12-01 <event> Motor vehicle theft Not determined <position> Santago <place> a street with a green wall that has graffiti on it. There are no clear indications of the condition of the buildings, but there is a construction site visible in the background. The public infrastructure includes a sidewalk and a street with a motorcycle parked on the side. The cleanliness of the street appears to be average, with no visible litter or debris.'

start_embedding = autoencoder.embed(start)
end_embedding = autoencoder.embed(end)

for t in torch.linspace(0, 1, 10):
    latent = slerp(start_embedding, end_embedding, t)
    pp(autoencoder.generate_from_latent(latent))

('date> 2017-05-13 event> Motor vehicle theft Not determined position> '
 'Providencia place> a street with a car parked on the side. The cleanliness '
 'of the street is not clear, but there is a series of incidents that suggest '
 'a revision of the road. The public infrastructure includes a sidewalk with a '
 'cyclist, and a street lamp is visible on the side.')
('date> 2017-05-13 event> Motor vehicle theft Not determined position> '
 'Providencia place> a street with a car parked on the side. The cleanliness '
 'of the road is not clear, but there is a confirmation of some conditions. '
 'There are no visible signs of litter or pollution on the street. The public '
 'infrastructure includes a sidewalk and a street that appears to be in a '
 'state of disrepair.')
('date> 2017-05-13 event> Motor vehicle theft Not determined position> '
 'Providencia place> a street with a bus that has graffiti on it. The '
 'condition of the bus is not clear, but there is a series of predictions of 

In [14]:
start = '<date> 2020-1-01 <event> Drug trafficking Not determined <position> Estacion Central <place> a street with a green wall that has graffiti on it. There are no clear indications of the condition of the buildings, but there is a construction site visible in the background. The public infrastructure includes a sidewalk and a street with a motorcycle parked on the side. The cleanliness of the street appears to be average, with no visible litter or debris.'
end = '<date> 2012-1-01 <event> Drug trafficking Not determined <position> Estacion Central <place> a street with a green wall that has graffiti on it. There are no clear indications of the condition of the buildings, but there is a construction site visible in the background. The public infrastructure includes a sidewalk and a street with a motorcycle parked on the side. The cleanliness of the street appears to be average, with no visible litter or debris.'

start_embedding = autoencoder.embed(start)
end_embedding = autoencoder.embed(end)

for t in torch.linspace(0, 1, 10):
    latent = slerp(start_embedding, end_embedding, t)
    pp(autoencoder.generate_from_latent(latent))

('date> 2015-05-29 event> Traffic law Not determined position> Providencia '
 'place> a street with a bus parked on the side. The condition of the bus is '
 'not clear, but there is a series of predictions that have occurred. The '
 'public infrastructure includes a sidewalk with a tree and a street. The '
 'cleanliness of the street appears to be average, with no visible litter or '
 'debris.')
('date> 2015-05-29 event> Drug trafficking Not determined position> '
 'Providencia place> a street with a construction that has graffiti on the '
 'wall. The condition of the buildings is not clear, but there is a detailed '
 'indication of the details. The public infrastructure includes a street with '
 'a motorcycle and a sidewalk. The cleanliness of the street appears to be '
 'average, with no visible litter or debris.')
('date> 2015-05-24 event> Drug trafficking Not determined position> '
 'Providencia place> a street with a bus that crashed on the road. The '
 'condition of the bus is no

## Q2: Moving through positive to negative

### Fixed commune (varying years)

In [15]:
positive_sentences = [
    """There was a case of street vending in Santiago in 2014""",
    """There was a case of domestic violence in Santiago in 2015""",
    """There was a minor theft in Santiago in 2015""",
    """There was a graffiti incident in Santiago in 2014""",
    """There was a case of vandalism in Santiago in 2013""",
    """There was a case of theft in Santiago in 2013""",
    """There was an incident of alcohol consumption in public road in Santiago in 2013""",
    """There was an incident of street vending or clandestine trade in Santiago in 2013""",
]    

negative_sentences = [
    """There was an incident of drug trafficking in Santiago in 2021""",
    """There was a burglary in Santiago in 2020""",
    """There was a minor theft in Santiago in 2020""",
    """There was a report of vehicle theft in Santiago in 2022""",
    """There was an incident of robbery with violence in Santiago in 2020""",
    """There was an incident of abandoned species in public road in Santiago in 2022""",
    """There was an incident of theft in Santiago in 2022""",
    """There was an incident of surprise robbery in Santiago in 2022""",
]

In [16]:
positive_embeddings = [autoencoder.embed(s) for s in tqdm(positive_sentences)]
negative_embeddings = [autoencoder.embed(s) for s in tqdm(negative_sentences)]

100%|██████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 99.30it/s]
100%|█████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 122.02it/s]


In [17]:
mean_positive_embedding = torch.mean(torch.stack(positive_embeddings), dim=0)
mean_negative_embedding = torch.mean(torch.stack(negative_embeddings), dim=0)
mean_positive_embedding.shape, mean_negative_embedding.shape

(torch.Size([768]), torch.Size([768]))

## Counterfactual gender

### Man

In [20]:
start = '<date> 2012-10-30 <event> A young man walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2013-07-24 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street life featuring a single woman '
 'covered through a street cafe or Facebook session.')
negative × -2.68
('date> 2013-07-16 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street lined with a woman through a '
 'street café lived along a Santiago side.')
negative × -2.37
('date> 2013-07-18 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street life through a street lined '
 'with a Chile at night.')
negative × -2.05
('date> 2013-07-24 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street woman lived along a street in '
 'a Santiago context.')
negative × -1.74
('date> 2013-07-18 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street life

### Woman

In [22]:
start = '<date> 2012-10-30 <event> A young woman walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2013-04 event> The street life covered a street woman sang or lived '
 'along a street through a cafe //')
negative × -2.68
('date> 2013-03-24 event> Street vending or clandestine trade Public road '
 'position> Santiago The scene depicts a street woman lived along a street '
 'through a Santiago café area.')
negative × -2.37
('date> 2013-04 event> Street vending or clandestine trade Public road '
 'position> Santiago The scene depicts a street woman lived through a street '
 'life along a Santiago café.')
negative × -2.05
('date> 2013-04-12 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street woman lived along a street '
 'through a Santiago cafe left.')
negative × -1.74
('date> 2013-04-18 event> Street vending or clandestine trade Public road '
 'position> Santiago The scene depicts a street woman lived through a street '
 'in front of a Santiago café at night.')
negative × -1.42
('date> 2013-04-12 event>

## Counterfactual place

### Unsafe place

In [23]:
start = '<place> A desolate and grim setting, marked by dirty streets and walls covered in graffiti, with little to no lighting and an absence of green spaces. The landscape is overwhelmingly dominated by concrete, with filth scattered across the area.The lack of vegetation or natural beauty leaves the environment feeling lifeless and sterile.'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))


negative × -3.00
('date> 2017-03-14 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street lined with a covered area, and '
 'the walls and faces covered, via a graffiti-covered wall. The style reflects '
 'a sense of local street life, with a cobblestone pavement and a $140.000 '
 'monthly wage covered.')
negative × -2.68
('date> 2013-07-24 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street lined with courts and covered '
 'spaces, characterized by a greca or wall covered with graffiti, and a faded '
 'color palette. The ground level also features graffiti, with the walls and '
 'cobblestone street life sustaining the environment.')
negative × -2.37
('date> 2017-03-14 event> Theft of object from or within vehicle Public road '
 'position> Santiago The scene depicts a street lined with, and there are no '
 'visible signs of public spaces or covered life, characterized by atextu

### Safe place

In [24]:
start = '<place> a street with a clear blue surface, which is a unique emergence. The street appears to be well-maintained with a lush green area, featuring a variety of trees and a large tree with blue leaves, indicating that its natural environment is likely to be well-purchased'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2013-07-16 event> Street vending or clandestine trade Public road '
 'position> Santiago The scene depicts a street that features a '
 'well-maintained landscape, characterized by a street covered with a blue '
 'facade, and a large tree providing shade, indicating a local style or '
 'community life.')
negative × -2.68
('date> 2013-07-16 event> Theft Private residences position> place> a street '
 'with a well-maintained landscape, characterized by a street name that has a '
 'blue style, and a leafy tree providing shade, indicating a local style or '
 'community.')
negative × -2.37
('date> 2013-07-16 event> Theft Private residences position> place> a street '
 'with a well-maintained landscape, characterized by a street that has a blue '
 'surface, and a leafy tree, indicating a variety of nature. The area appears '
 'to be a house or community space with a blue style.')
negative × -2.05
('date> 2013-07-16 event> Theft of object from or within vehicle Public 

### Fixed year (varying commune)

In [25]:
positive_sentences = [
    """There was a robbery in Estación Central in 2013""",
    """There was an incident of public alcohol consumption in Estación Central in 2013""",
    """There was a street vending case in Estación Central in 2013""",
    """There was a domestic violence report in Estación Central in 2013""",
    """There was an incident of robbery with violence in Estación Central in 2013""",
    """There was an incident of theft in Estación Central in 2013""",
    """There was an incident of assault in Santiago in 2013""",
    """There was an incident of robbery in Santiago in 2013""",
]    

negative_sentences = [
    """There was a robbery in Providencia in 2013""",
    """There was an incident of public alcohol consumption in Providencia in 2013""",
    """There was a street vending case in Providencia in 2013""",
    """There was a domestic violence report in Providencia in 2013""",
    """There was an incident of robbery with violence in Providencia in 2013""",
    """There was an incident of theft in Providencia in 2013""",
    """There was an incident of assault in Vitacura in 2013""",
    """There was an incident of robbery in Vitacura in 2013""",
]

In [26]:
positive_embeddings = [autoencoder.embed(s) for s in tqdm(positive_sentences)]
negative_embeddings = [autoencoder.embed(s) for s in tqdm(negative_sentences)]

mean_positive_embedding = torch.mean(torch.stack(positive_embeddings), dim=0)
mean_negative_embedding = torch.mean(torch.stack(negative_embeddings), dim=0)
mean_positive_embedding.shape, mean_negative_embedding.shape

100%|██████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 98.11it/s]
100%|█████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 114.65it/s]


(torch.Size([768]), torch.Size([768]))

## Counterfactual gender

### Man

In [27]:
start = '<date> 2012-10-30 <event> A young man walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
'street A young urban aged, across a street heute at "E." Then, a young Islam,'
negative × -2.68
('A street, A young urban aged, heute at street level, along a sidewalk. The '
 'street is insane, "SEST," in an urban atmosphere.')
negative × -2.37
('A street young Arthur A young urban street at night, 8:30 a Stunden along an '
 'urban street.')
negative × -2.05
('A young man walks street at night in an urban atmosphere. A young man stands '
 'along a sidewalk street.')
negative × -1.74
('A young man speaks street at night in an urban atmosphere. A young man walks '
 'along a street 12:00-17 event>')
negative × -1.42
'A young man speaks at night in an urban street along a Santiago Stunden.'
negative × -1.11
'A young man walks young himself at night in an urban street.'
negative × -0.79
'A young man walks himself into an urban street at night.'
negative × -0.47
('date> 2019-12-11 event> Attempted robbery Sexual abuse of person older than '
 '14 and younger than 18 years i

### Woman

In [29]:
start = '<date> 2012-10-30 <event> A young woman walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
'young. A street lined Girls at heute, across an urban street.'
negative × -2.68
'A aged, street. "Ann," through A heute, Occidental at street.'
negative × -2.37
'A young aged, street through an sidewalk in a stands. Then, aged,'
negative × -2.05
'A young street A-12 Stunden, young at night, along a street in Santiago, Est.'
negative × -1.74
('A young woman A street at night in an urban Stunden. The street steps '
 'through a sidewalk alongside.')
negative × -1.42
'A young woman speaks at night in an urban street A Stunden young.'
negative × -1.11
('A young woman A street A-12 in an urban atmosphere at night. Stunden walks '
 'along a street in Santiago')
negative × -0.79
'A young woman speaks herself in an urban street at night.'
negative × -0.47
'A young woman speaks herself in an urban street at night.'
negative × -0.16
('date> 2019-12-11 event> Attempted robbery Woman walks alone in an urban '
 'street through Santiago.')
negative × 0.16
('date> 2019-12-11 event> A

## Counterfactual place

### Unsafe place

In [30]:
start = '<place> A desolate and grim setting, marked by dirty streets and walls covered in graffiti, with little to no lighting and an absence of green spaces. The landscape is overwhelmingly dominated by concrete, with filth scattered across the area.The lack of vegetation or natural beauty leaves the environment feeling lifeless and sterile.'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))



negative × -3.00
('date> 2022-08-17 event> Theft of object from or within vehicle Estacion '
 'Central The scene depicts a street lined with low, historic buildings and '
 'walls, marked by horizontal slats and an urban atmosphere, with a melting of '
 'natural elements and older structures visible. The street is heavily '
 'surrounded by a concrete wall, and the overall atmosphere is urban and '
 'Estacion Central.')
negative × -2.68
('date> 2022-08-17 event> Theft of object from or within vehicle Public road '
 'position> Estacion Central The scene depicts a street lined with low, '
 'Arkansas walls and marked by urban elements, such as walls and graffiti, '
 'with a melting snow atmosphere separating the elements. The street is '
 'heavily populated, and the overall atmosphere of Estacion Central and '
 'Arkansas urban elements is calm and unrhythmic.')
negative × -2.37
('date> 2022-08-17 event> Theft of object from or within vehicle Public road '
 'position> Estacion Central The sc

('date> 2021-06-24 event> Damages Not determined position> Santiago The image '
 'is partially obscured by vanity and vandalism, with graffiti covering the '
 'surfaces and nolivables visible in the image. The cleanliness of the work is '
 'not due to the lack of providencia, but by lush greenery.')


### Safe place

In [31]:
start = '<place> a street with a clear blue surface, which is a unique emergence. The street appears to be well-maintained with a lush green area, featuring a variety of trees and a large tree with blue leaves, indicating that its natural environment is likely to be well-purchased'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2013-08-17 event> Street vending or clandestine trade Estacion Central '
 'The scene features a wide, southeastern street lined with a prominent orange '
 'structure, which has a distinctive orange Estacion Central, indicating a red '
 'and EOS atmosphere. The area appears to be urban, with a mix of modern and '
 'older architectural styles.')
negative × -2.68
('date> 2013-08-17 event> Street vending or clandestine trade Estacion Central '
 'The scene features a wide, paved street that is distinctively orange, with a '
 'few trees and a central area, indicating a vibrant urban environment.')
negative × -2.37
('date> 2013-08-17 event> Theft Estacion Central The scene features a wide, '
 'southeastern street that defines its constitutional area, with a prominent '
 'orange structure, a blue wall, and a growing urban environment, which '
 'appears to be well-maintained, indicating a variety of traditional and '
 'modern elements.')
negative × -2.05
('date> 2022-05

### Fixed year (varying commune)

In [32]:
positive_sentences = [
    """There was a robbery in Estación Central in 2013""",
    """There was an incident of public alcohol consumption in Estación Central in 2013""",
    """There was a street vending case in Estación Central in 2013""",
    """There was a domestic violence report in Estación Central in 2013""",
    """There was an incident of robbery with violence in Estación Central in 2013""",
    """There was an incident of theft in Estación Central in 2013""",
    """There was an incident of assault in Santiago in 2013""",
    """There was an incident of robbery in Santiago in 2013""",
]    

negative_sentences = [
    """There was a robbery in Providencia in 2013""",
    """There was an incident of public alcohol consumption in Providencia in 2013""",
    """There was a street vending case in Providencia in 2013""",
    """There was a domestic violence report in Providencia in 2013""",
    """There was an incident of robbery with violence in Providencia in 2013""",
    """There was an incident of theft in Providencia in 2013""",
    """There was an incident of assault in Vitacura in 2013""",
    """There was an incident of robbery in Vitacura in 2013""",
]

In [33]:
positive_embeddings = [autoencoder.embed(s) for s in tqdm(positive_sentences)]
negative_embeddings = [autoencoder.embed(s) for s in tqdm(negative_sentences)]

mean_positive_embedding = torch.mean(torch.stack(positive_embeddings), dim=0)
mean_negative_embedding = torch.mean(torch.stack(negative_embeddings), dim=0)
mean_positive_embedding.shape, mean_negative_embedding.shape

100%|█████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 105.08it/s]
100%|█████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 118.92it/s]


(torch.Size([768]), torch.Size([768]))

## Counterfactual gender

### Man

In [34]:
start = '<date> 2012-10-30 <event> A young man walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
'A young street, aged across an urban sidewalk, "SST"'
negative × -2.68
('A young street A-17, urban, along a sidewalk at night, date> 83 hours. The '
 'street appears young in an urban atmosphere.')
negative × -2.37
'A young street A-12-13 at night, Always Dies in an urban street a sidewalk.'
negative × -2.05
'A young man walks street A-12 in an urban atmosphere at night.'
negative × -1.74
'A young man walks young himself along a street in an urban atmosphere.'
negative × -1.42
'A young man speaks himself at night in an urban street along a sidewalk.'
negative × -1.11
'A young man walks A street at night in an urban atmosphere.'
negative × -0.79
'A young man walks himself along a street in Santiago at night.'
negative × -0.47
('date> 2019-12-11 event> Attempted robbery Männer lived alone in an urban '
 'street through a Santiago Stunden.')
negative × -0.16
('date> 2019-12-11 event> Attempted robbery Night at night A man walks alone '
 'in a street through Santiago.')


### Woman

In [35]:
start = '<date> 2012-10-30 <event> A young woman walks alone through a street in Santiago at midnight. <position> Santiago '
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('A. aged. The street exploded 3-2 underground at "East," A shocked urban '
 'street heute.')
negative × -2.68
'A aged, across street. The sidewalk at night, "E,"'
negative × -2.37
('A young street aged in an urban at night. Then, "She Dies," along a street '
 'heute.')
negative × -2.05
('A young woman street aged A in an urban atmosphere, along a sidewalk street '
 'heute.')
negative × -1.74
'A young woman aged A in an urban street at night. Dies young along a sidewalk.'
negative × -1.42
'A young woman walks A street in an urban atmosphere at night.'
negative × -1.11
'A young woman speaks in an urban street at night, Stunden Ahhh.'
negative × -0.79
'A young woman walks herself along a street in Santiago at night.'
negative × -0.47
'A woman speaks herself in an urban street at night. A Stunden'
negative × -0.16
'A woman speaks herself in an urban street at night.'
negative × 0.16
("date> 2019-12-11 event> Attempted robbery Girls' man walking in an urban "
 'street thro

## Counterfactual place

### Unsafe place

In [37]:
start = '<place> A desolate and grim setting, marked by dirty streets and walls covered in graffiti, with little to no lighting and an absence of green spaces. The landscape is overwhelmingly dominated by concrete, with filth scattered across the area.The lack of vegetation or natural beauty leaves the environment feeling lifeless and sterile.'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2022-08-17 event> Street vending or clandestine trade Estacion Central '
 'The scene depicts a street lined with low, historic buildings and walls, '
 'marked by a horizontal Sachsenhausen, and with minimal greenery and urban '
 'elements above. The atmosphere is increasingly urban, with a textured wall '
 'and older structures lining the street.')
negative × -2.68
('date> 2022-08-17 event> Theft of object from or within vehicle Estacion '
 'Central The scene depicts a street lined with low, walls and marked by '
 'minimal landscaping, including trees and bushes, with a melting snow '
 'atmosphere overhead. The street is increasingly dissolved, and the overall '
 'elements of urban and historic elements are visible.')
negative × -2.37
('date> 2022-08-17 event> Theft of object from or within vehicle Public road '
 'position> Estacion Central The scene features a street lined with, and at '
 'Stalin Days, characterized by low walls and minimal greenery, with tree

### Safe place

In [38]:
start = '<place> a street with a clear blue surface, which is a unique emergence. The street appears to be well-maintained with a lush green area, featuring a variety of trees and a large tree with blue leaves, indicating that its natural environment is likely to be well-purchased'
start_embedding = autoencoder.embed(start)

positive_to_negative = mean_negative_embedding - mean_positive_embedding

for t in torch.linspace(-3, 3, 20):
    embedding = slerp(start_embedding, start_embedding + positive_to_negative, t)
    print(f'negative × {t:.2f}')
    pp(autoencoder.generate_from_latent(embedding))

negative × -3.00
('date> 2022-08-13 event> Street vending or clandestine trade Estacion Central '
 'The scene depicts a spacious, open area with a distinctive blue street, '
 'characterized by a high-rise structure and a few trees, which are likely '
 'older, indicating a historical urban environment.')
negative × -2.68
('date> 2013-08-17 event> Theft Estacion Central The scene depicts a wide, '
 'southeastern street that features a distinctive orange structure, with a '
 'large tree and a blue wall, indicating it is likely a seasonal change, '
 'surrounded by older and newer urban areas.')
negative × -2.37
('date> 2013-08-17 event> Street vending or clandestine trade Public road '
 'position> Estacion Central The scene features a wide, paved street with a '
 'prominent orange structure, a distinctive of a new high-rise building, which '
 'is likely a cultural landmark, showcasing its orange and Eloise elements, '
 'along with a variety of trees and a wide street that includes a paved 