# Synthetic Item Correlations

## Setup and Imports

In [43]:
import torch
import yaml
import pandas as pd
from itertools import combinations
from sentence_transformers import SentenceTransformer, util
from IPython.display import display, Markdown


config_path = 'config.yaml'

with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
    

## Loading the model

In [44]:
model = SentenceTransformer(
    model_name_or_path=config['model_path'],
    device=device
)

## Load and process item text 

In [45]:
data = pd.read_csv(filepath_or_buffer=config['measures_path'])
data['item'] = data['item'].str.replace('<[^>]*>', '', regex=True)

display(Markdown('#### Measures Dataset (Preview)'))
display(data.head(2))

df = pd.DataFrame(
    [(*a, *b) for a, b in combinations(data[['id', 'item']].values, 2)],
    columns=['id1', 'item1', 'id2', 'item2']
)

display(Markdown('#### Item Pairs (Preview)'))
display(df.head(2))

#### Measures Dataset (Preview)

Unnamed: 0.1,Unnamed: 0,id,pos,instructions,item,scale,reversed,comment,option1,option2,option3,option4,option5,option6,option7,instrument_name,instrument_acronym,doi
0,0,AAID_01,1.0,The following statements focus on the use of A...,The use of AI in Defense could be used to spy ...,attitude toward AI in defense,True,,strongly disagree,disagree,slightly disagree,neither agree nor disagree,slightly agree,agree,strongly agree,Attitudes Toward AI in Defense Scale,AAID,https://doi.org/10.3389/fpsyg.2023.1164810
1,4,AAID_05,5.0,The following statements focus on the use of A...,The use of AI in Defense could be used unethic...,attitude toward AI in defense,True,,strongly disagree,disagree,slightly disagree,neither agree nor disagree,slightly agree,agree,strongly agree,Attitudes Toward AI in Defense Scale,AAID,https://doi.org/10.3389/fpsyg.2023.1164810


#### Item Pairs (Preview)

Unnamed: 0,id1,item1,id2,item2
0,AAID_01,The use of AI in Defense could be used to spy ...,AAID_05,The use of AI in Defense could be used unethic...
1,AAID_01,The use of AI in Defense could be used to spy ...,AAID_07,The use of AI in Defense could be used to main...


## Predict Item Correlations

In [52]:
embeddings1 = model.encode(
    sentences=df['item1'],
    convert_to_numpy=True,
    batch_size=config['batch_size'],
    show_progress_bar=True
)

embeddings2 = model.encode(
    sentences=df['item2'],
    convert_to_numpy=True,
    batch_size=config['batch_size'],
    show_progress_bar=True
)

cosine_similarities = util.cos_sim(embeddings1, embeddings2)
df['prediction'] = cosine_similarities.diagonal()

display(Markdown('#### Predicted Item Correlations (Preview)'))
display(df.head(10))

Batches:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/29 [00:00<?, ?it/s]

#### Predicted Item Correlations (Preview)

Unnamed: 0,id1,item1,id2,item2,prediction
0,AAID_01,The use of AI in Defense could be used to spy ...,AAID_05,The use of AI in Defense could be used unethic...,0.812638
1,AAID_01,The use of AI in Defense could be used to spy ...,AAID_07,The use of AI in Defense could be used to main...,0.659631
2,AAID_01,The use of AI in Defense could be used to spy ...,AAID_09,The use of AI in Defense could lead to unfores...,0.705703
3,AAID_01,The use of AI in Defense could be used to spy ...,AAID_12,The use of AI in Defense could save lives,0.604247
4,AAID_01,The use of AI in Defense could be used to spy ...,AAID_14,The use of AI in Defense could protect critica...,0.66202
5,AAID_01,The use of AI in Defense could be used to spy ...,PANAS_09,"During the last two weeks, I have felt enthusi...",-0.030292
6,AAID_01,The use of AI in Defense could be used to spy ...,PANAS_14,"During the last two weeks, I have felt inspired",0.040297
7,AAID_01,The use of AI in Defense could be used to spy ...,PANAS_10,"During the last two weeks, I have felt proud",-0.01157
8,AAID_01,The use of AI in Defense could be used to spy ...,PANAS_05,"During the last two weeks, I have felt strong",-0.030755
9,AAID_01,The use of AI in Defense could be used to spy ...,PANAS_08,"During the last two weeks, I have felt hostile",0.125096


## Save predictions

In [53]:
df.to_csv('predictions.csv')