# Importing Libraries

In [196]:
# Data Processing
import pandas as pd

# BERT
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
from scipy.spatial.distance import cosine

# Importing Data

In [197]:
df = pd.read_pickle("../data/preprocessed_dataset")

In [198]:
df.head()

Unnamed: 0,questionTitle,questionText,questionLink,topic,answerText,upvotes,views,root_topic,root_multi_label,reflection
0,How can I keep a long distance relationship go...,We weren't long distance until he joined the m...,https://counselchat.com/questions/how-can-i-ke...,relationships,Hello. You are asking a very good question abo...,9,481,family_conflicts,"[1, 0, 1]",You are asking a very good question about how ...
1,How can I ask my boyfriend about who he's text...,We've been in a long distance relationship for...,https://counselchat.com/questions/how-can-i-as...,relationships,I agree with Sherry that in a close intimate r...,9,472,family_conflicts,"[1, 0, 0]",I agree with Sherry that in a close intimate r...
2,Why do I feel like I'm always wrong in everyth...,My wife is always accusing me of cheating and ...,https://counselchat.com/questions/why-do-i-fee...,workplace-relationships,Hello. That must be very frustrating for you t...,9,268,others,"[1, 0, 1]",That must be very frustrating for you to feel ...
3,Why do I feel sad all the time?,I just feel sad all the time and I don't like ...,https://counselchat.com/questions/why-do-i-fee...,family-conflict,"Hello,While one can be sad from time to time, ...",9,264,family_conflicts,"[1, 1, 0]","If you feel sad on most days, it is worthwhile..."
4,The Underlying Causes of Being Possessive,I am extremely possessive in my relationships ...,https://counselchat.com/questions/the-underlyi...,social-relationships,Hi there. Its great you are able to realize th...,7,224,others,"[0, 0, 1]",Its great you are able to realize there are ot...


# Loading BERT UNCASED BASE Pretrained Model

In [199]:
def initialize_tokenizer(pretrained_model):
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained(pretrained_model)
    return tokenizer

def get_index(tokenized_concept, tokenized_text):
    result=[]
    sll=len(tokenized_concept)
    for ind in (i for i,e in enumerate(tokenized_text) if e==tokenized_concept[0]):
        if tokenized_text[ind:ind+sll]==tokenized_concept:
            result = [ind,ind+sll-1]
    return result

In [200]:
pretrained_model = 'bert-base-uncased'
tokenizer = initialize_tokenizer(pretrained_model)
model = BertModel.from_pretrained(pretrained_model)
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): BertLayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): BertLayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (intermediate): BertIntermediate(
          (dense): Lin

In [201]:
def get_contextual_vector(model, tokenizer, question, context):
    encoded_answer_context = "[CLS] " + question + " " + context + " [SEP]"
        # maximum words that BERT can have is 512
    encoded_answer_context = encoded_answer_context[:512]
    tokenized_encoded_answer_context = tokenizer.tokenize(encoded_answer_context)

    indexed_encoded_answer_context = tokenizer.convert_tokens_to_ids(tokenized_encoded_answer_context)
    segments_ids_encoded_answer_context= [1] * len(tokenized_encoded_answer_context)
    tokens_tensor_encoded_answer_context = torch.tensor([indexed_encoded_answer_context])
    segments_tensors_encoded_answer_context = torch.tensor([segments_ids_encoded_answer_context])

    # Predict hidden states features for each layer
    with torch.no_grad():
        encoded_layers_answer_context, _ = model(tokens_tensor_encoded_answer_context, segments_tensors_encoded_answer_context)
    token_vecs_answer_context= encoded_layers_answer_context[11][0]

    tokenized_answer = tokenizer.tokenize(question)
    indexes = get_index(tokenized_answer, tokenized_encoded_answer_context)

    if len(indexes) == 0:
        print ("check if question title is present in the question context")
        sys.exit()

    first_index, last_index = indexes[0], indexes[1] 
    
    return torch.mean(token_vecs_answer_context[first_index:last_index + 1], dim=0)

In [202]:
contextual_vectors = []
for i in range(df.shape[0]):
    try:
        contextual_vectors.append(get_contextual_vector(model, tokenizer, df.loc[i,"questionTitle"], df.loc[i,"questionText"]))
    except:
        contextual_vectors.append([0 for i in range(768)])

In [203]:
df["contextual_vector"] = pd.Series(contextual_vectors)

In [204]:
df.memory_usage(deep=True, index = True).sum()

2001331

# Save the Dataframe with Contextual Vectors

In [205]:
df.to_pickle("../data/contextual_similarity_df")

# Load the Saved DataFrame

In [206]:
df = pd.read_pickle("../data/contextual_similarity_df")

In [207]:
df.head()

Unnamed: 0,questionTitle,questionText,questionLink,topic,answerText,upvotes,views,root_topic,root_multi_label,reflection,contextual_vector
0,How can I keep a long distance relationship go...,We weren't long distance until he joined the m...,https://counselchat.com/questions/how-can-i-ke...,relationships,Hello. You are asking a very good question abo...,9,481,family_conflicts,"[1, 0, 1]",You are asking a very good question about how ...,"[tensor(0.1770), tensor(-0.2739), tensor(-0.12..."
1,How can I ask my boyfriend about who he's text...,We've been in a long distance relationship for...,https://counselchat.com/questions/how-can-i-as...,relationships,I agree with Sherry that in a close intimate r...,9,472,family_conflicts,"[1, 0, 0]",I agree with Sherry that in a close intimate r...,"[tensor(0.1125), tensor(-0.0316), tensor(-0.17..."
2,Why do I feel like I'm always wrong in everyth...,My wife is always accusing me of cheating and ...,https://counselchat.com/questions/why-do-i-fee...,workplace-relationships,Hello. That must be very frustrating for you t...,9,268,others,"[1, 0, 1]",That must be very frustrating for you to feel ...,"[tensor(-0.3317), tensor(0.4079), tensor(-0.07..."
3,Why do I feel sad all the time?,I just feel sad all the time and I don't like ...,https://counselchat.com/questions/why-do-i-fee...,family-conflict,"Hello,While one can be sad from time to time, ...",9,264,family_conflicts,"[1, 1, 0]","If you feel sad on most days, it is worthwhile...","[tensor(-0.4240), tensor(0.2931), tensor(0.159..."
4,The Underlying Causes of Being Possessive,I am extremely possessive in my relationships ...,https://counselchat.com/questions/the-underlyi...,social-relationships,Hi there. Its great you are able to realize th...,7,224,others,"[0, 0, 1]",Its great you are able to realize there are ot...,"[tensor(-0.4955), tensor(0.3385), tensor(0.119..."


# Generate Reflection

In [223]:
user_question_context = "I recently got married. My wife is not happy with our sex life. "

user_question = "My partner is forcing me to do sex. I cannot that often."


In [224]:
user_vec = get_contextual_vector(model, tokenizer, user_question, user_question_context)

In [225]:
for i in range(df.shape[0]):
    df.loc[i, "similarity_score"] = 1 - cosine(user_vec, df.loc[i, "contextual_vector"]) 

In [226]:
df.sort_values(by=['similarity_score'], ascending=False)

Unnamed: 0,questionTitle,questionText,questionLink,topic,answerText,upvotes,views,root_topic,root_multi_label,reflection,contextual_vector,similarity_score
28,Is it okay for my girlfriend to have sex with ...,I am currently suffering from erectile dysfunc...,https://counselchat.com/questions/is-it-okay-f...,relationships,"Hi, First and foremost, I want to acknowledge ...",4,275,family_conflicts,"[1, 0, 1]","Hi, First and foremost, I want to acknowledge ...","[tensor(0.2053), tensor(-0.1093), tensor(0.203...",0.849132
529,My husband makes me uncomfortable about sex,"Both of my parents committed suicide together,...",https://counselchat.com/questions/my-husband-m...,relationships,"Hello, and thank you for your question. First,...",0,158,family_conflicts,"[1, 1, 0]","First, I want to tell you how sorry I am for t...","[tensor(-0.0643), tensor(0.2894), tensor(0.006...",0.821277
34,"I told my family doctor yesterday, that I am h...","I have major depression, severe, PTSD, anxiety...",https://counselchat.com/questions/i-told-my-fa...,legal-regulatory,Your doctor might be required to tell your psy...,4,214,others,"[0, 0, 1]",Your doctor might be required to tell your psy...,"[tensor(0.0135), tensor(0.0899), tensor(0.0320...",0.805143
322,I tell my husband I love him. But I don't beli...,When having sex I think of other men and want ...,https://counselchat.com/questions/i-tell-my-hu...,intimacy,Sustaining passion in marriage is tough stuff....,1,138,family_conflicts,"[1, 0, 0]",It sounds like your definitions of love and s...,"[tensor(0.4006), tensor(0.2649), tensor(0.5402...",0.799799
408,I'm losing my husband because my lack of sex d...,I have no sex drive due to medical issues. I’v...,https://counselchat.com/questions/i-m-losing-m...,intimacy,I’m sorry to hear about your current situation...,1,64,family_conflicts,"[1, 0, 0]",I’m sorry to hear about your current situation,"[tensor(0.3184), tensor(-0.0136), tensor(0.136...",0.793432
78,I want to have a threesome with my husband and...,My husband and I had our first threesome recen...,https://counselchat.com/questions/i-want-to-ha...,relationships,As someone who specializes in sexuality and po...,2,1475,family_conflicts,"[1, 0, 1]",As someone who specializes in sexuality and po...,"[tensor(0.2515), tensor(-0.2236), tensor(0.254...",0.791576
774,How can I tell my dad I'm a female-to-male tra...,I have known I was always different. This year...,https://counselchat.com/questions/how-can-i-te...,family-conflict,"Hello, and thank you for your question. This i...",0,40,family_conflicts,"[1, 0, 1]",This is certainly a tough spot,"[tensor(0.2391), tensor(-0.0566), tensor(-0.04...",0.790124
121,How can I have a better sex life when I don't ...,My husband and I have been married for seven y...,https://counselchat.com/questions/how-can-i-ha...,human-sexuality,There are many different ways that can be appr...,2,298,others,"[1, 0, 1]",It sounds like there is some confusion about ...,"[tensor(0.2249), tensor(0.0991), tensor(-0.180...",0.784516
499,Why can't I get an erection with my girlfriend?,I love my girlfriend so much. I get an erectio...,https://counselchat.com/questions/why-can-t-i-...,human-sexuality,Im sorry to hear of your problem.First step as...,0,198,others,"[1, 0, 1]",Im sorry to hear of your problem,"[tensor(0.1525), tensor(-0.0068), tensor(0.040...",0.784501
209,My mother is trying to control my life and I d...,I am in my early 20s and I still live with my ...,https://counselchat.com/questions/my-mother-is...,parenting,Hi! I'm sorry you are having such a tough ti...,1,544,family_conflicts,"[1, 0, 0]",Hi! I'm sorry you are having such a tough ti...,"[tensor(0.2406), tensor(0.1553), tensor(-0.059...",0.783997


In [227]:
print ("Youper:", df[df.similarity_score == df.similarity_score.max()].reset_index().reflection[0])

Youper: Hi, First and foremost, I want to acknowledge your efforts to gain (your) ideal erectile function
