### 0. Setting

- 드라이브 mount
- 경로 이동
- 라이브러리 및 모듈 설치

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pwd

'/content'

In [None]:
# 경로 변경 필요할 경우, 수정
%cd ./drive/MyDrive/DACS_공유ver

/content/drive/MyDrive/DACS_공유ver


In [None]:
!pip install transformers
!pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.2.1-py3-none-any.whl (801 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m801.6/801.6 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m59.3 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch_lightning
Successfully installed lightning-utilities-0.10.1 pytorch_lightning-2.2.1 torchmetrics-1.3.1


In [None]:
# data
import pandas as pd
import numpy as np
import random
import os
import argparse
import json
import torch
import pickle
import time
import csv
import io
import re
import textwrap
from tqdm.auto import tqdm
from pathlib import Path

# torch & transformer
from torch import nn
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset
from transformers import BertForSequenceClassification
from transformers import BertConfig, BertTokenizer, BertModel
# from transformers import BertPreTrainedModel, BertModel, BertConfig, BertTokenizer
from transformers import AutoTokenizer
from transformers import BertForSequenceClassification

# summary model
from transformers import (AdamW,T5ForConditionalGeneration,T5TokenizerFast as T5Tokenizer)

# Distil Bert
from transformers import DistilBertModel, DistilBertTokenizer, DistilBertConfig, DistilBertForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup

# retrieval model
from encoder import PolyEncoder, CrossEncoder
from transform_ver2 import SelectionJoinTransform, SelectionSequentialTransform, SelectionConcatTransform
from chat_function import Poly_function, Cross_function, Filter_function

# graph
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
from pylab import rcParams

### 1. Depression Intensity Model

In [None]:
# intensity model path
default_path = os.getcwd()
data_path = os.path.join(default_path, './Intensity_model/data/DSM_intensity')
base_model = os.path.join(default_path, './Intensity_model/base-model')
model_path = os.path.join(default_path, './Intensity_model/model')
config_path = os.path.join(default_path, './Intensity_model/config')
log_path = os.path.join(default_path, './Intensity_model/log')

In [None]:
with open(os.path.join(config_path, 'training_config.json')) as f:
    training_config = json.load(f)

training_config['pad'] = 'max_length'
training_config['device'] = torch.device("cuda") if torch.cuda.is_available() else "cpu"

In [None]:
class DSMDataset(Dataset):
    def __init__(self, data_file):
        self.data = data_file

    def __len__(self):
        return len(self.data.label)

    def reset_index(self):
        self.data.reset_index(inplace=True, drop=True)

    def __getitem__(self, idx):
        '''
        return text, label
        '''
        self.reset_index()
        text = self.data.text[idx]
        label = self.data.label[idx]
        return text, label

class DSMProcessor():
    def __init__(self, config, training_config, tokenizer, truncation=True):
        self.tokenizer = tokenizer
        self.max_len = config.max_position_embeddings
        self.pad = training_config['pad']
        self.batch_size = training_config['train_batch_size']
        self.truncation = truncation

    def convert_data(self, data_file):
        context2 = None    # single sentence classification
        batch_encoding = self.tokenizer.batch_encode_plus(
            [(data_file[idx][0], context2) for idx in range(len(data_file))],   # text,
            max_length = self.max_len,
            padding = self.pad,
            truncation = self.truncation
        )

        features = []
        for i in range(len(data_file)):
            inputs = {k: batch_encoding[k][i] for k in batch_encoding}
            try:
                inputs['label'] = data_file[i][1]
            except:
                inputs['label'] = 0
            features.append(inputs)

        all_input_ids = torch.tensor([f['input_ids'] for f in features], dtype=torch.long)
        all_attention_mask = torch.tensor([f['attention_mask'] for f in features], dtype=torch.long)
        # all_token_type_ids = torch.tensor([f['token_type_ids'] for f in features], dtype=torch.long)
        all_labels = torch.tensor([f['label'] for f in features], dtype=torch.long)

        # dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels)
        dataset = TensorDataset(all_input_ids, all_attention_mask, all_labels)
        return dataset

    def convert_sentence(self, sent_list):   # 사용자 입력 문장 1개 -> 입력 형태 변환
        context2 = None
        batch_encoding = self.tokenizer.batch_encode_plus(
            [(sent_list, context2)], max_length=self.max_len, padding=self.pad, truncation=self.truncation
        )

        features = []
        inputs = {k: batch_encoding[k][0] for k in batch_encoding}
        inputs['label'] = 0
        features.append(inputs)

        input_id = torch.tensor([f['input_ids'] for f in features], dtype=torch.long)
        input_am = torch.tensor([f['attention_mask'] for f in features], dtype=torch.long)
        # input_tts = torch.tensor([f['token_type_ids'] for f in features], dtype=torch.long)
        input_lb = torch.tensor([f['label'] for f in features], dtype=torch.long)
        dataset = TensorDataset(input_id, input_am, input_lb)
        return dataset

    def shuffle_data(self, dataset, data_type):
        if data_type == 'train':
            return RandomSampler(dataset)
        elif data_type == 'eval' or data_type == 'test':
            return SequentialSampler(dataset)

    def load_data(self, dataset, sampler):
        return DataLoader(dataset, sampler=sampler, batch_size=self.batch_size)

class BWSRegressor(nn.Module):
    def __init__(self, config, model):
        super(BWSRegressor, self).__init__()
        self.model = model
        self.linear = nn.Linear(config.hidden_size, 128)
        self.relu = nn.ReLU()
        self.out = nn.Linear(128, 1)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.last_hidden_state[:, 0, :]
        # print(f'logits: {len(logits)}, {len(logits[0])}')
        x = self.linear(logits)
        x = self.relu(x)
        score = self.out(x)
        # print(f'score: {score}')
        return score

class BertRegTester():
    def __init__(self, training_config, model):
        self.training_config = training_config
        self.model = model

    def get_label(self, test_dataloader, test_type):
        '''
        test_type: 0  -> Test dataset
        test_type: 1  -> Test sentence
        '''
        preds = []
        labels = []

        for batch in test_dataloader:
            self.model.eval()   # self 안 붙이면 이상한 Output (BaseModelOutputWithPoolingAndCrossAttentions) 출력
            batch = tuple(t.to(self.training_config['device']) for t in batch)   # args.device: cuda
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    # "token_type_ids": batch[2],
                }
                outputs = self.model(**inputs)
                if test_type == 0:
                    preds.extend(outputs.squeeze().detach().cpu().numpy())
                elif test_type == 1:
                    preds.extend(outputs[0].detach().cpu().numpy())
            label = batch[2].detach().cpu().numpy()
            labels.extend(label)
        return preds, labels

In [None]:
intense_model_name = 'distilbert-base-uncased'
intense_tokenizer = DistilBertTokenizer.from_pretrained(intense_model_name, model_max_length=128)
intense_config = DistilBertConfig.from_pretrained(intense_model_name, output_hidden_states=True, output_attentions=True)
intense_model = DistilBertModel.from_pretrained(intense_model_name, config=intense_config)

# intense_config.max_position_embeddings = 128
# intense_model.to(training_config['device'])

bws_processor = DSMProcessor(intense_config, training_config, intense_tokenizer)
bws_reg = BWSRegressor(intense_config, intense_model)
bws_model_name = os.path.join(model_path, 'distil_bws_10.pt')
bws_reg.load_state_dict(torch.load(bws_model_name))
bws_reg.to(training_config['device'])
bws_tester = BertRegTester(training_config, bws_reg)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
def predict_depression(text):

  bws_data = bws_processor.convert_sentence(text)
  bws_sampler = bws_processor.shuffle_data(bws_data, 'test')
  bws_loader = bws_processor.load_data(bws_data, bws_sampler)
  bws_pred, _ = bws_tester.get_label(bws_loader, 1)

  return bws_pred[0]

In [None]:
# 예제
sent = "I feel very depressed"
predict_depression(sent)

10.039019

### 2. Summary Model

In [None]:
class dialogueSummaryModel(pl.LightningModule):

    def __init__(self):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(Model_name, return_dict=True)

    def forward(self, input_ids, attention_mask, decoder_attention_mask, labels=None):

        output = self.model(
            input_ids,
            attention_mask=attention_mask,
            labels=labels,
            decoder_attention_mask=decoder_attention_mask
        )

        return output.loss, output.logits

    def training_step(self, batch, batch_idx):
        input_ids = batch['text_input_ids']
        attention_mask = batch['text_attention_mask']
        labels = batch['labels']
        labels_attention_mask = batch['labels_attention_mask']

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels
        )

        self.log('train_loss', loss, prog_bar=True, logger=True)
        return loss

    def test_step(self,batch, batch_idx):
        input_ids=batch['text_input_ids']
        attention_mask=batch['text_attention_mask']
        labels=batch['labels']
        labels_attention_mask=batch['labels_attention_mask']

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels
        )

        self.log('test_loss', loss, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids=batch['text_input_ids']
        attention_mask=batch['text_attention_mask']
        labels=batch['labels']
        labels_attention_mask=batch['labels_attention_mask']

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels
        )

        self.log('val_loss', loss, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):

        return AdamW(self.parameters(), lr=0.0001)

In [None]:
def summarize(text):
  text_encoding=t5_tokenizer(
      text,
      max_length=512,
      padding='max_length',
      truncation=True,
      return_attention_mask=True,
      add_special_tokens=True,
      return_tensors='pt'
  )

  generated_ids=summary_model.model.generate(
      input_ids=text_encoding['input_ids'].to('cuda:0'),
      attention_mask=text_encoding['attention_mask'].to('cuda:0'),
      max_length=150,
      num_beams=2,
      repetition_penalty=2.5,
      length_penalty=1.0,
      early_stopping=True
  )

  preds =[
          t5_tokenizer.decode(gen_id,skip_special_tokens=True,clean_up_tokenization_spaces=True)
          for gen_id in generated_ids
  ]
  return " ".join(preds)

In [None]:
# Load Model
Model_name = 't5-base'
t5_tokenizer=T5Tokenizer.from_pretrained(Model_name)

summary_model=dialogueSummaryModel.load_from_checkpoint("./Summary_model/t5_base.ckpt")
summary_model.freeze()
summary_model.to('cuda:0')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

dialogueSummaryModel(
  (model): T5ForConditionalGeneration(
    (shared): Embedding(32128, 768)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32128, 768)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=768, out_features=768, bias=False)
                (k): Linear(in_features=768, out_features=768, bias=False)
                (v): Linear(in_features=768, out_features=768, bias=False)
                (o): Linear(in_features=768, out_features=768, bias=False)
                (relative_attention_bias): Embedding(32, 12)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=768, out_features=3072, bias=False)
                (wo): Linear(i

### 3. Retrieval-based Response Model

In [None]:
# use gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model path
PATH_ER = './Retrieval_model/model/filter/emotional_reaction/pytorch_model.bin' # emotional reaction
PATH_EX = './Retrieval_model/model/filter/exploration/pytorch_model.bin' # exploration

# pre-trained bert model
er_model_name = 'bert-base-uncased'
ex_model_name = 'bert-base-uncased'
er_tokenizer = AutoTokenizer.from_pretrained(er_model_name)
ex_tokenizer = AutoTokenizer.from_pretrained(ex_model_name)

# Load fine-tuning model
er_model = BertForSequenceClassification.from_pretrained(er_model_name)
er_model.load_state_dict(torch.load(PATH_ER), strict = False)
er_model.to(device)

ex_model = BertForSequenceClassification.from_pretrained(ex_model_name)
ex_model.load_state_dict(torch.load(PATH_EX), strict = False)
ex_model.to(device)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [None]:
# model path
PATH_P = './Retrieval_model/model/polyencoder/poly_64_pytorch_model.pt'
PATH_C = './Retrieval_model/model/crossencoder/cross_0_pytorch_model.pt'

# PolyEncoder pre-trained bert model
# base line model name
bert_name = 'bert-base-uncased'
bert_config = BertConfig.from_pretrained(bert_name)
bert = BertModel.from_pretrained(bert_name, config=bert_config)

# CrossEncoder pre-trained bert model
# base line model name
# rerank_name = 'bert-base-uncased'
rerank_name = 'google/bert_uncased_L-4_H-512_A-8'
rerank_config = BertConfig.from_pretrained(rerank_name)
rerank_bert = BertModel.from_pretrained(rerank_name, config=rerank_config)

# PolyEncoder load tokenizer
tokenizer = BertTokenizer.from_pretrained(bert_name)
tokenizer.add_tokens(['\n'], special_tokens=True)

# CrossEncoder load tokenizer
rerank_tokenizer = BertTokenizer.from_pretrained(rerank_name)
rerank_tokenizer.add_tokens(['\n'], special_tokens=True)

# tokenize input utterance
context_transform = SelectionJoinTransform(tokenizer=tokenizer, max_len=512)
response_transform = SelectionSequentialTransform(tokenizer=tokenizer, max_len=50) # if it is long, cuda out of memory occurs
concat_transform = SelectionConcatTransform(tokenizer=rerank_tokenizer, max_len=512)

# Load poly model
model = PolyEncoder(bert_config, bert=bert, poly_m=64) # m = 16, 64 (default)
model.resize_token_embeddings(len(tokenizer))

# Load cross model
rerank_model = CrossEncoder(rerank_config, bert=rerank_bert)
rerank_model.resize_token_embeddings(len(rerank_tokenizer))

# use gpu
model.load_state_dict(torch.load(PATH_P), strict = False)
model.to(device)
model.device

rerank_model.load_state_dict(torch.load(PATH_C), strict = False)
rerank_model.to(device)
rerank_model.device

# use cpu
# model.load_state_dict(torch.load(PATH_P, map_location=device))
# model.to(device)
# model.device

# rerank_model.load_state_dict(torch.load(PATH_C, map_location=device))
# rerank_model.to(device)
# rerank_model.device

# function of each encoder
p_func = Poly_function(model=model, bert=bert, tokenizer=tokenizer, device=device,
                       context_transform = context_transform, response_transform = response_transform)
c_func = Cross_function(model=rerank_model, bert=rerank_bert, tokenizer=rerank_tokenizer, device=device,
                        concat_transform = concat_transform)
er_func = Filter_function(model=er_model, tokenizer=er_tokenizer, device=device)
ex_func = Filter_function(model=ex_model, tokenizer=ex_tokenizer, device=device)

config.json:   0%|          | 0.00/383 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/116M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [None]:
# load candidate database
data = pd.read_csv('./Retrieval_model/data/eng_candidate.csv', encoding = 'utf8')
cand_data = data['response'] #v1v2 response # aaai candidate
print(f'Total candidates: {len(cand_data)}')

# use gpu
with open('./Retrieval_model/data/eng_candidate_emb.pickle', 'rb') as fr:
    final_cand_emb = pickle.load(fr)

# use cpu
# cpu 이용할 경우
# with open('./data/eng_candidate_reddit_emb.pickle', 'rb') as fr:
#     final_cand_emb = CPU_Unpickler(fr).load()

print(f'Embedding shape: {final_cand_emb.shape}')

Total candidates: 62804
Embedding shape: torch.Size([1, 62804, 768])


In [None]:
def change_filter(past_dep, current_dep):

  global depression_filter

  """
  past_dep: past depression intensity
  current_dep: current depression intensity
  """

  if past_dep <= current_dep: # 과거보다 현재 우울강도가 더 높음
    print("change current filter")
    if depression_filter == 0:
      depression_filter = 1
    elif depression_filter == 1:
      depression_filter = 0

  else: # 과거보다 현재 우울강도가 낮음
    print("keep current filter")
    if depression_filter == 0:
      depression_filter = 0
    elif depression_filter == 1:
      depression_filter =1

  return depression_filter

### 4. Execute Chatbot (DACS)

In [None]:
global all_his
global turn
global depression_filter

# save all conversation
all_his = []

# for summary (different input style), save summary history
summary_his, summary = [], []

# save depresion score, entire depression score for graph
depression_score, depression_history = [], []

# turn_history
turn_history = []

# default_setting
turn = 1
depression_filter = 0

while True:

    # start consultant bot
    er_level = []
    ex_level = []

    # extract top 10 candidates from Poly Encoder
    idx_list = []
    top_cand = []

    print('Client/user: ')
    user_context = [str(input())]
    context_score = predict_depression(user_context[0]) # current depression score
    # print(f'current depression: {context_score}')
    depression_score.append(context_score)
    depression_history.append(context_score)

    word = user_context[0].split(' ') # input sentence as token

    # end conversation
    if user_context == ['end']:
       print("Conversation Saved!")
       with open('./test_data/eng_test_data/test_example.csv', 'a', newline = '') as file:
        writer = csv.writer(file)
        writer.writerow(all_his)
        file.close()
        break

    else:
        all_his.append(user_context[0])
        summary_his.append('user:' + user_context[0])

        start = time.time()
        if turn == 1:
          user_emb = p_func.ctx_emb(*p_func.input_context(user_context))

        elif turn <= 4:
          user_emb = p_func.ctx_emb(*p_func.input_context(all_his)) # for multi-turn

        else:
          user_input = [summary[-1]] + user_context # summary 문장 + user_context
          # print(user_input)
          user_emb = p_func.ctx_emb(*p_func.input_context(user_input))

        final_score = p_func.score(user_emb, final_cand_emb)
        new_score = final_score.sort()

        # most relevant 5
        for i in range(5):
          idx_list.append(int(new_score[1][0][-5:][i]))
        for idx in reversed(idx_list):
          top_cand.append(cand_data[idx])

        # save top 5 candidates
        top_cand = pd.Series(top_cand)

        rerank_list = []

        # sorting
        final_response_list = []

        if turn == 1:
          cross_score = c_func.text_emb(*c_func.input_text(user_context, top_cand))
          prob_cross_score = F.softmax(cross_score, dim = 0) # sum of probability = 1

        elif turn <= 4:
          cross_score = c_func.text_emb(*c_func.input_text(all_his, top_cand))
          prob_cross_score = F.softmax(cross_score, dim = 0)

        else:
          cross_score = c_func.text_emb(*c_func.input_text(user_input, top_cand))
          prob_cross_score = F.softmax(cross_score, dim = 0)

        for i in range(len(prob_cross_score)):
          rerank_list.append(prob_cross_score[i].item())

        # sorting
        reversed_idxes = np.argsort(rerank_list)[::-1]
        for i in range(len(prob_cross_score)):
          final_response_list.append(top_cand[reversed_idxes[i]])

        for cand in final_response_list:
          er_pred = er_func.prediction(er_model, er_tokenizer, cand)
          ex_pred = ex_func.prediction(ex_model, ex_tokenizer, cand)
          er_level.append(er_pred)
          ex_level.append(ex_pred)

        final_response_list = pd.Series(final_response_list)
        er_level = pd.Series(er_level)
        ex_level = pd.Series(ex_level)

        # make new final response dataframe
        er_ex_response_df = pd.concat([final_response_list, er_level, ex_level], axis = 1)
        er_ex_response_df.columns = ['candidate', 'er', 'ex']

        new_response_list = []
        spare_response_list = []

        # early conversation → questioning
        if turn <= 4:
          for i in range(len(er_ex_response_df)):
            if er_ex_response_df['ex'][i] == 1:
              new_response_list.append(er_ex_response_df['candidate'][i])
            else:
              spare_response_list.append(er_ex_response_df['candidate'][i])

        elif depression_filter == 0: # question
          for i in range(len(er_ex_response_df)):
              if er_ex_response_df['ex'][i] == 1:
                  new_response_list.append(er_ex_response_df['candidate'][i])
              else:
                  spare_response_list.append(er_ex_response_df['candidate'][i])
        elif depression_filter == 1: # emotional reaction
          for i in range(len(er_ex_response_df)):
            if er_ex_response_df['er'][i] == 1:
              new_response_list.append(er_ex_response_df['candidate'][i])
            else:
              spare_response_list.append(er_ex_response_df['candidate'][i])


        if len(new_response_list) >= 3:
            # select the best response
            first_response = new_response_list[0]
            # select the second best response
            second_response = new_response_list[1]
            # select the third best response
            third_response = new_response_list[2]

        else:
            new_response_list = new_response_list + spare_response_list
            # print(len(new_response_list))
            # select the best response
            first_response = new_response_list[0]
            # select the second best response
            second_response = new_response_list[1]
            # select the third best response
            third_response = new_response_list[2]

        # select the response which is not in conv_his
        if first_response in all_his and second_response in all_his:
            best_response = third_response
        elif turn > 0 and first_response in all_his:
            best_response = second_response
        else:
            best_response = first_response

        print()
        print('Consultant bot: ')
        print(best_response)

        # save the conversation
        all_his.append(best_response)
        summary_his.append('chatbot:' + best_response)

        # sub conversation reset
        if turn % 2 == 0:
          input_sum = '\n'.join(summary_his)
          conv_summary = summarize(summary_his)
          summary.append(conv_summary)
          summary_his = []

        if turn % 4 == 0:

          # check user's preference
          avg_depression_score = sum(depression_score) / len(depression_score)
          print('avg depression:', avg_depression_score)
          print('current depression:', context_score)


          change_filter(avg_depression_score, context_score)

          # 4 turn 단위로 초기화
          depression_score = []

        turn_history.append(turn)
        turn += 1


        end = time.time()
        # print('To reply',round(end-start,2),'sec is consumed')
        print()

Client/user: 
I feel so upset

Consultant bot: 
Why are you upset

Client/user: 
I failed the exam

Consultant bot: 
Why were you upset

Client/user: 
I took the test but the result was worse than I expected

Consultant bot: 
Oh no what happened? Why dont you think you did well?

Client/user: 
I checked my grade and it was serious

Consultant bot: 
Man that is just awful though that happened to you. What was the out come of the exam?
avg depression: 5.848640322685242
current depression: 3.6747775
keep current filter

Client/user: 
I would take another exam or take an additional class

Consultant bot: 
Aw Im sorry. Can you retake the class?

Client/user: 
I could, but I don't want it because it was really tired

Consultant bot: 
That test seems so hard. I probably couldnt even pass it. How did you do?

Client/user: 
I also heard that it was difficult. However still I couldn't expect that I failed it

Consultant bot: 
What is the subject?? The test will be oral or in writing.

Client/use

FileNotFoundError: [Errno 2] No such file or directory: './test_data/test_example.csv'