# **PICO EXTRACTION** *| BERT NER*
### Participants/Problem (P), Intervention (I), Comparison (C) and Outcome (O)

In [2]:
# Intstall required libraries
!pip install datasets
!pip install -U accelerate
!pip install -U transformers
!pip install seqeval
!pip install tqdm

Collecting accelerate
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.12.0
    Uninstalling accelerate-0.12.0:
      Successfully uninstalled accelerate-0.12.0
Successfully installed accelerate-0.20.3
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.29.2
    Uninstalling transformers-4.29.2:
      Successfully uninstalled transformers-4.29.2
Successfully installed transformers-4.30.2
[0mCollecting seqeval
  Downloading seqeval-1.2.

In [3]:
# Import necessary libraries
import re
import pandas as pd
import numpy as np
import itertools
from datasets import Dataset
from datasets import load_metric
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
import torch
from tqdm import tqdm

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [10]:
# Create necessary functions
def tokenize_and_align_labels(examples):
    label_all_tokens = True
    tokenized_inputs = tokenizer(list(examples["tokens"]), truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"{task}_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif label[word_idx] == '0':
                label_ids.append(0)
            elif word_idx != previous_word_idx:
                label_ids.append(label_encoding_dict[label[word_idx]])
            else:
                label_ids.append(label_encoding_dict[label[word_idx]] if label_all_tokens else -100)
            previous_word_idx = word_idx
        labels.append(label_ids)
        
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {"precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"]}

    
def replace_entity(old_entity):
    if "POPULATION" in old_entity or "SAMPLE" in old_entity:
        entity = "I-POPULATION"
    elif "PARTICIPANT" in old_entity:
        entity = "I-PARTICIPANT"
    elif "INTERVENTION" in old_entity:
        entity = "I-INTERVENTION"
    elif "COMPAR" in old_entity:
        entity = "I-COMPARISON"
    elif "OUTCOME" in old_entity:
        entity = "I-OUTCOME"
    elif bool(re.search('[a-zA-Z]', old_entity)) == False:
        entity = ""
    else:
        entity = "O"
    return(entity)

In [4]:
# Read the text data
with open("/kaggle/input/pubmeddata/pubmed_data.txt") as f:
    content = f.readlines()

In [5]:
print(len(content))

369304


### **DATA PREPARATION**
Convert the text data to PICO BERT trainable data format

Convert the text file and seperate the following fields in seperate column in a dataframe

* PMID
* PICO_Entity
* AOMRC Entity
* Text

In [7]:
# Data preprocessing & manipulation
content_df = pd.DataFrame()

for i in tqdm(range(len(content))):
    if("###" in content[i]):
        content_df.loc[i, "PMID"] = content[i].split(":")[0].replace("#", "")
    elif(content[i] == "\n"):
        pass
    else:
        content_df.loc[i, "PMID"] = content_df.loc[i-1, "PMID"]
        content_df.loc[i, "PICO_Entity"] = content[i].split("|")[0]
        content_df.loc[i, "AOMRC_Entity"] = content[i].split("|")[1]
        content_df.loc[i, "Text"] = content[i].split("|")[2]

100%|██████████| 369304/369304 [4:45:20<00:00, 21.57it/s]  


In [8]:
content_df.to_csv('processed_data.csv', encoding='utf-8')

In [33]:
content_df= pd.read_csv('/kaggle/input/pubmeddata/processed_data.csv')  

In [34]:
# Display manipulated data | samples
content_df.head()

Unnamed: 0.1,Unnamed: 0,PMID,PICO_Entity,AOMRC_Entity,Text
0,0,28628768,,,
1,1,28628768,OBJECTIVE,A,To test the efficacy of a pregnancy adapted ve...
2,2,28628768,SETTING,M,Online and telephone .\n
3,3,28628768,POPULATION OR SAMPLE,P,Self-referred pregnant women ( gestational wee...
4,4,28628768,METHODS,M,@ pregnant women ( gestational week @ ) with m...


In [35]:
content_df = content_df.drop(['Unnamed: 0'], axis=1)

### **DATA CLEANING**
You can there are NAN's occupied with the columns

In [36]:
# Remove NAN's
content_df = content_df[pd.notnull(content_df['Text'])]

In [37]:
# Display cleaned data | samples
content_df.head()

Unnamed: 0,PMID,PICO_Entity,AOMRC_Entity,Text
1,28628768,OBJECTIVE,A,To test the efficacy of a pregnancy adapted ve...
2,28628768,SETTING,M,Online and telephone .\n
3,28628768,POPULATION OR SAMPLE,P,Self-referred pregnant women ( gestational wee...
4,28628768,METHODS,M,@ pregnant women ( gestational week @ ) with m...
5,28628768,MAIN OUTCOME MEASURES,O,The primary outcome was depressive symptoms me...


### **DATA CONVERSION**
Convert the data with the required format and rename

In [38]:
# Datatype formating and renaming
content_df["Text"] = content_df["Text"].astype(str)
content_df["Text"] = content_df["Text"].apply(lambda x:x.strip())
content_df = content_df[["PMID", "Text", "PICO_Entity"]]
content_df.rename(columns = {'Text':'tokens', 'PICO_Entity':'ner_tags'}, inplace = True)
content_df = content_df.reset_index(drop=True)

In [39]:
# Display formatted samples
content_df.head()

Unnamed: 0,PMID,tokens,ner_tags
0,28628768,To test the efficacy of a pregnancy adapted ve...,OBJECTIVE
1,28628768,Online and telephone .,SETTING
2,28628768,Self-referred pregnant women ( gestational wee...,POPULATION OR SAMPLE
3,28628768,@ pregnant women ( gestational week @ ) with m...,METHODS
4,28628768,The primary outcome was depressive symptoms me...,MAIN OUTCOME MEASURES


### **DATA PREPROCESSING**
Convert the data required parameters that needs to train the model

In [40]:
# Calculate token length
content_df["token_len"] = content_df["tokens"].apply(lambda x: len(x.split()))

In [41]:
# Display samples
print(content_df.head())

       PMID                                             tokens  \
0  28628768  To test the efficacy of a pregnancy adapted ve...   
1  28628768                             Online and telephone .   
2  28628768  Self-referred pregnant women ( gestational wee...   
3  28628768  @ pregnant women ( gestational week @ ) with m...   
4  28628768  The primary outcome was depressive symptoms me...   

                ner_tags  token_len  
0              OBJECTIVE         29  
1                SETTING          4  
2   POPULATION OR SAMPLE         17  
3                METHODS         36  
4  MAIN OUTCOME MEASURES         18  


In [42]:
# Replace entities with required entity names
content_df["ner_tags"] = content_df["ner_tags"].apply(lambda x: replace_entity(x))

In [43]:
# Display samples
content_df.head()

Unnamed: 0,PMID,tokens,ner_tags,token_len
0,28628768,To test the efficacy of a pregnancy adapted ve...,O,29
1,28628768,Online and telephone .,O,4
2,28628768,Self-referred pregnant women ( gestational wee...,I-POPULATION,17
3,28628768,@ pregnant women ( gestational week @ ) with m...,O,36
4,28628768,The primary outcome was depressive symptoms me...,I-OUTCOME,18


In [44]:
# Iterate entities to all words/token
for i in range(len(content_df)):
    content_df.loc[i, "ner_tags"] = str([content_df["ner_tags"][i]] * content_df["token_len"][i])
    content_df.loc[i, "tokens"] = str(content_df["tokens"][i].split())

In [46]:
content_df.head()

Unnamed: 0,PMID,tokens,ner_tags,token_len
0,28628768,"['To', 'test', 'the', 'efficacy', 'of', 'a', '...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ...",29
1,28628768,"['Online', 'and', 'telephone', '.']","['O', 'O', 'O', 'O']",4
2,28628768,"['Self-referred', 'pregnant', 'women', '(', 'g...","['I-POPULATION', 'I-POPULATION', 'I-POPULATION...",17
3,28628768,"['@', 'pregnant', 'women', '(', 'gestational',...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ...",36
4,28628768,"['The', 'primary', 'outcome', 'was', 'depressi...","['I-OUTCOME', 'I-OUTCOME', 'I-OUTCOME', 'I-OUT...",18


In [47]:
# Converts string to list of values
content_df["ner_tags"] = content_df["ner_tags"].apply(lambda x:eval(x))
content_df["tokens"] = content_df["tokens"].apply(lambda x:eval(x))

In [48]:
# Group by PMID
content_df = content_df.groupby('PMID').agg(lambda x: x.to_list())

In [49]:
# Combine list of list
content_df["tokens"] = content_df["tokens"].apply(lambda x:list(itertools.chain.from_iterable(x)))
content_df["ner_tags"] = content_df["ner_tags"].apply(lambda x:list(itertools.chain.from_iterable(x)))

In [50]:
# Reset dataframe
content_df = content_df.reset_index(drop=True)

In [51]:
# Display preprocessed data | samples
content_df.head()

Unnamed: 0,tokens,ner_tags,token_len
0,"[To, determine, whether, prophylactic, treatme...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[25, 38, 9, 19, 15, 53, 49, 21, 29]"
1,"[Since, it, is, not, clear, whether, testoster...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[44, 20, 38, 17, 17, 10, 21, 7, 25, 10, 30, 24]"
2,"[The, aim, was, to, study, the, pharmacokineti...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[30, 24, 41, 22, 12, 10, 42, 13, 16, 45, 37, 2..."
3,"[To, investigate, the, significance, of, treat...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[33, 15, 29, 32, 26, 19, 35, 33, 37, 16, 38, 7]"
4,"[Dopamine, agonists, have, a, well, establishe...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[26, 34, 12, 45, 22, 45, 27, 65, 22, 47, 23, 3..."


In [26]:
# # Save preprocessed data
# content_df.to_excel("preprocessed_df.xlsx")

**TEST / TRAIN SPLIT**
---
Split the train data set into two sets by the ratio 8:2. and we can furthur evaluate the model performance

In [22]:
# content_df = pd.read_excel('/kaggle/input/pubmeddata/preprocessed_df_complete.xlsx')
# content_df = content_df.drop(['Unnamed: 0'], axis=1)

In [52]:
content_df.columns

Index(['tokens', 'ner_tags', 'token_len'], dtype='object')

In [53]:
#Test train split
train, test = train_test_split(content_df, test_size=0.2, shuffle=False, random_state=1)

train = train.reset_index(drop=True)
test = test.reset_index(drop=True)

train_df = train
test_df = test

In [54]:
# Print Shape
print(len(train_df))
print(len(test_df))

19734
4934


In [55]:
# Label to numeric representation
label_list = ['O','I-POPULATION', 'I-PARTICIPANT','I-INTERVENTION', 'I-COMPARISON','I-OUTCOME']
label_encoding_dict = {'O': 0, 'I-POPULATION': 1, 'I-PARTICIPANT': 2, 'I-INTERVENTION': 3, 'I-COMPARISON': 4, 'I-OUTCOME': 5}

**MODEL INITIALIZATION**
---
Initialize the BERT model. Define the Task Name, Model, Tokenizer

In [56]:
# Initialize the BERT model
task = "ner" 
model_checkpoint = "distilbert-base-uncased"
batch_size = 16
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))

# Initialize pretrained tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) 

data_collator = DataCollatorForTokenClassification(tokenizer) #Some random data enhancements

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream

**TOKENIZATION**
---
Tokenize and embed the dataset with pretrained BERT

In [57]:
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

train_tokenized_datasets = train_dataset.map(tokenize_and_align_labels, batched=True)
test_tokenized_datasets = test_dataset.map(tokenize_and_align_labels, batched=True)

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

**HYPER PARAMETER TURNING**
---
Hyperparameters - We can iterate and tune the model with these parameters for better results

In [58]:
args = TrainingArguments(
    f"test-{task}",
    evaluation_strategy = "epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=1e-5,
)

metric = load_metric("seqeval")

Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

**TRAINING MODULE**
---
Train the model with different parameters and finalize the optimal one

In [59]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_tokenized_datasets,
    eval_dataset=test_tokenized_datasets,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3217,0.297821,0.298578,0.59217,0.39699,0.883375
2,0.2214,0.260993,0.35718,0.629476,0.455754,0.90589
3,0.1681,0.266071,0.38009,0.654577,0.480924,0.908936




TrainOutput(global_step=1851, training_loss=0.21845037418079016, metrics={'train_runtime': 1911.0866, 'train_samples_per_second': 30.978, 'train_steps_per_second': 0.969, 'total_flos': 7724608640091936.0, 'train_loss': 0.21845037418079016, 'epoch': 3.0})

# **EVALUATION**

---

Evaluate the remaining 20 percent of training data. 
It is a good idea to take a part of training data for validation while training the model.

In [60]:
trainer.evaluate()

{'eval_loss': 0.2660711109638214,
 'eval_precision': 0.3800895901584542,
 'eval_recall': 0.6545768566493955,
 'eval_f1': 0.4809237797140682,
 'eval_accuracy': 0.9089357946526421,
 'eval_runtime': 112.2381,
 'eval_samples_per_second': 43.96,
 'eval_steps_per_second': 1.381,
 'epoch': 3.0}

*Save the Model*
---
Save the model to disk

In [61]:
# Save the model to disk
trainer.save_model('un-ner.model')

**PREDICTION MODULE**
---
Predict the **Unseen Data**


In [63]:
# Load the tokenizer and trained model from disk
tokenizer = AutoTokenizer.from_pretrained('./un-ner.model/')
model = AutoModelForTokenClassification.from_pretrained('./un-ner.model/', num_labels=len(label_list))

In [64]:
# Input unseen data
paragraph = '''
OBJECTIVE|A|To test the efficacy of a pregnancy adapted version of an existing 10-week ICBT-program for depression as well as assessing acceptability and adherence DESIGN: Randomised controlled trial.
SETTING|M|Online and telephone.
POPULATION OR SAMPLE|P|Self-referred pregnant women (gestational week 10-28 at intake) currently suffering from major depressive disorder.
METHODS|M|42 pregnant women (gestational week 12-28) with major depression were randomised to either treatment as usual (TAU) provided at their antenatal clinic or to ICBT as an add-on to usual care.
MAIN OUTCOME MEASURES|O|The primary outcome was depressive symptoms measured with the Montgomery-sberg depression rating scale-self report (MADRS-S). The Edinburgh Postnatal Depression Scale and measures of anxiety and sleep were used. Credibility, satisfaction, adherence and utilization were also assessed.
RESULTS|R|The ICBT group had significantly lower levels of depressive symptoms post treatment (p < 0.001, Hedges g =1.21) and were more likely to be responders (i.e. achieve a statistically reliable improvement) (RR = 0.36; p = 0.004). Measures of treatment credibility, satisfaction, utilization, and adherence were comparable to implemented ICBT for depression.
LIMITATIONS|Others|Small sample size and no long-term evaluation.
CONCLUSION|C|Pregnancy adapted ICBT for antenatal depression is feasible, acceptable and efficacious. These results need to be replicated in larger trials to validate these promising findings.
'''

In [65]:
# Tokenize the paragraph
tokens = tokenizer(paragraph)
torch.tensor(tokens['input_ids']).unsqueeze(0).size()

torch.Size([1, 332])

In [66]:
# Predictions
predictions = model.forward(input_ids=torch.tensor(tokens['input_ids']).unsqueeze(0), attention_mask=torch.tensor(tokens['attention_mask']).unsqueeze(0))
predictions = torch.argmax(predictions.logits.squeeze(), axis=1)
predictions = [label_list[i] for i in predictions]

In [67]:
print(predictions)

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PARTICIPANT', 'I-PAR

In [68]:
words = tokenizer.batch_decode(tokens['input_ids'])

In [69]:
# Store the token and predicted entities in a dataframe
prediction_results = pd.DataFrame({'ner': predictions, 'words': words})

In [70]:
# Print predicted samples
print(prediction_results)

    ner      words
0     O      [CLS]
1     O  objective
2     O          |
3     O          a
4     O          |
..   ..        ...
327   O      these
328   O  promising
329   O   findings
330   O          .
331   O      [SEP]

[332 rows x 2 columns]


*Export the results*
---
Export test results to **csv**

Select the **Words, Labels** and **Confidence scores** to export csv file

In [71]:
label_list = ['O','I-POPULATION', 'I-PARTICIPANT','I-INTERVENTION', 'I-COMPARISON','I-OUTCOME']
O_List=[]
I_POPULATION_List=[]
I_PARTICIPANT_List=[]
I_INTERVENTION_List=[]
I_COMPARISON_List=[]
I_OUTCOME_List=[]
for ner,word in zip(predictions,words):
    if ner == 'O':
        O_List.append(word)
    elif ner == 'I-POPULATION':
        I_POPULATION_List.append(word)
    elif ner == 'I-PARTICIPANT':
        I_PARTICIPANT_List.append(word)
    elif ner == 'I-INTERVENTION':
        I_INTERVENTION_List.append(word)
    elif ner == 'I-COMPARISON':
        I_COMPARISON_List.append(word)
    elif ner == 'I-OUTCOME':
        I_OUTCOME_List.append(word)

In [72]:
print('O       '+' '.join(O_List))
print("------------------------------------------")
print('I-POPULATION        ' + ' '.join(I_POPULATION_List))
print("------------------------------------------")
print('I-PARTICIPANT       '+' '.join(I_PARTICIPANT_List))
print("------------------------------------------")
print('I-INTERVENTION       '+' '.join(I_INTERVENTION_List))
print("------------------------------------------")
print('I-COMPARISON        ' + ' '.join(I_COMPARISON_List))
print("------------------------------------------")
print('I-OUTCOME        ' + ' '.join(I_OUTCOME_List))

O       [CLS] objective | a | to test the efficacy of a pregnancy adapted version of an existing 10 - week ic ##bt - program for depression as well as assessing accept ##ability and adherence design : random ##ised controlled trial . setting | m | online and telephone . ##bt - main outcome measures | o | the primary outcome was de ##pressive symptoms measured with the montgomery - sb ##er ##g depression rating scale - self report ( mad ##rs - s ) . the edinburgh post ##nat ##al depression scale and measures of anxiety and sleep were used . credibility , satisfaction , adherence and were also assessed . results | r | the ic ##bt group had significantly lower levels of de ##pressive symptoms post treatment ( p < 0 . 001 , hedges g = 1 . 21 ) and were more likely to be respond ##ers ( i . e . achieve a statistical ##ly reliable improvement ) ( rr = 0 . 36 ; p = 0 . 00 ##4 ) . measures of treatment credibility , satisfaction , utilization , and adherence were comparable to implemented ic #

In [73]:
# Export results to csv
prediction_results.to_csv("prediction_results.csv")