In [1]:
!pip install transformers[torch] optuna sentencepiece

Collecting transformers[torch]
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optuna
  Downloading optuna-3.2.0-py3-none-any.whl (390 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers[torch])
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[torch])
  Downloading to

In [2]:
# load packages
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AdamW, get_linear_schedule_with_warmup
import time
import datetime
import random
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
import re
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from optuna.pruners import SuccessiveHalvingPruner
from optuna.samplers import TPESampler
import math

torch.cuda.amp.autocast(enabled=True)

<torch.cuda.amp.autocast_mode.autocast at 0x7efd0b017dc0>

In [3]:
## Input data paths

## base data
base_path = "/content/drive/MyDrive/CS4NLP-HateXplain/data/t5_modeling/"
train_base_path = base_path + "df_train.csv"
val_base_path = base_path + "df_val.csv"
test_base_path = base_path + "df_test.csv"


## explanations
exp_path = "/content/drive/MyDrive/CS4NLP-HateXplain/data/t5_modeling/t5-for-explanation/"
train_explanations_path =  exp_path + "df_train_pred_exp.csv"
val_explanations_path =  exp_path + "df_val_pred_exp.csv"
test_explanations_path =  exp_path + "df_test_pred_exp.csv"

## keywords
kws_path = "/content/drive/MyDrive/CS4NLP-HateXplain/data/t5_modeling/t5-for-keywords/"
train_kw_path = kws_path + "df_train_pred_kw.csv"
val_kw_path = kws_path + "df_val_pred_kw.csv"
test_kw_path = kws_path + "df_test_pred_kw.csv"


## Read base data
df_train_base = pd.read_csv(train_base_path).astype(str)[['prefix','input_text','target_text']]
df_val_base = pd.read_csv(val_base_path).astype(str)[['prefix','input_text','target_text']]
df_test_base = pd.read_csv(test_base_path).astype(str)[['prefix','input_text','target_text']]
df_train_base = df_train_base[df_train_base['prefix']=='label'].copy()
df_val_base = df_val_base[df_val_base['prefix']=='label'].copy()
df_test_base = df_test_base[df_test_base['prefix']=='label'].copy()
df_train_base.rename(columns={'input_text':'sentence'}, inplace=True)
df_val_base.rename(columns={'input_text':'sentence'}, inplace=True)
df_test_base.rename(columns={'input_text':'sentence'}, inplace=True)


## Read explanations data
df_train_exp = pd.read_csv(train_explanations_path)
df_val_exp = pd.read_csv(val_explanations_path)
df_test_exp = pd.read_csv(test_explanations_path)
df_train_exp.rename(columns={'input_text':'sentence', 'predicted':'predicted_exp'}, inplace=True)
df_val_exp.rename(columns={'input_text':'sentence', 'predicted':'predicted_exp'}, inplace=True)
df_test_exp.rename(columns={'input_text':'sentence', 'predicted':'predicted_exp'}, inplace=True)


## Read keywords data
df_train_kw = pd.read_csv(train_kw_path)
df_val_kw = pd.read_csv(val_kw_path)
df_test_kw = pd.read_csv(test_kw_path)
df_train_kw.rename(columns={'input_text':'sentence', 'predicted':'predicted_kw'}, inplace=True)
df_val_kw.rename(columns={'input_text':'sentence', 'predicted':'predicted_kw'}, inplace=True)
df_test_kw.rename(columns={'input_text':'sentence', 'predicted':'predicted_kw'}, inplace=True)


## Check shapes
print("Base data")
print("Train: ", df_train_base.shape)
print("Val: ", df_val_base.shape)
print("Test: ", df_test_base.shape)


print("Explanations: ")
print("Train: ", df_train_exp.shape)
print("Val: ", df_val_exp.shape)
print("Test: ", df_test_exp.shape)

print("Keywords: ")
print("Train: ", df_train_kw.shape)
print("Val: ", df_val_kw.shape)
print("Test: ", df_test_kw.shape)

Base data
Train:  (14072, 3)
Val:  (1787, 3)
Test:  (1761, 3)
Explanations: 
Train:  (14057, 5)
Val:  (1786, 5)
Test:  (1759, 5)
Keywords: 
Train:  (14072, 5)
Val:  (1787, 5)
Test:  (1761, 5)


In [4]:
## Output Paths
model_op_path = "/content/drive/MyDrive/CS4NLP-HateXplain/data/t5_modeling/t5_ip_sent_exp_kw_v2/"


In [7]:
## Merge data function to get modeling data
def get_model_data(df_label, df_exp, df_kw):
  df1 = df_label.merge(df_exp[['sentence','predicted_exp']], on = ['sentence'], how = 'inner')
  df = df1.merge(df_kw[['sentence','predicted_kw']], on = ['sentence'], how = 'inner')
  df = df.astype(str)
  kw_prefix = " The keywords in the sentence are: "
  df['input_text'] = df.apply(lambda row: row['sentence'] + '. ' + row['predicted_exp'] + kw_prefix + row['predicted_kw'], axis=1)
  df = df[['prefix','input_text','target_text']].copy()
  df.drop_duplicates(subset=['input_text'], keep='first', inplace=True)
  return df

train_df = get_model_data(df_train_base, df_train_exp, df_train_kw)
eval_df = get_model_data(df_val_base, df_val_exp, df_val_kw)
test_df = get_model_data(df_test_base, df_test_exp, df_test_kw)

print("Train shape: ", train_df.shape)
print("Val shape: ", eval_df.shape)
print("Test shape: ", test_df.shape)

Train shape:  (14060, 3)
Val shape:  (1786, 3)
Test shape:  (1759, 3)


In [8]:
train_df.isnull().sum()

prefix         0
input_text     0
target_text    0
dtype: int64

In [9]:
## Instantiate Tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-base')

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [10]:
# tokenize the main text
def tokenize_corpus(df, tokenizer, max_len=300):
    # token ID storage
    input_ids = []
    # attension mask storage
    attention_masks = []
    # max len -- 512 is max
    max_len = max_len
    # for every document:
    for doc in df:
        # `encode_plus` will:
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        encoded_dict = tokenizer.encode_plus(
                            doc,  # document to encode.
                            add_special_tokens=True,  # add tokens relative to model
                            max_length=max_len,  # set max length
                            truncation=True,  # truncate longer messages
                            pad_to_max_length=True,  # add padding
                            return_attention_mask=True,  # create attn. masks
                            return_tensors='pt'  # return pytorch tensors
                       )

        # add the tokenized sentence to the list
        input_ids.append(encoded_dict['input_ids'])

        # and its attention mask (differentiates padding from non-padding)
        attention_masks.append(encoded_dict['attention_mask'])

    return torch.cat(input_ids, dim=0), torch.cat(attention_masks, dim=0)


# create tokenized data - input_text
train_body_input_ids, train_body_attention_masks = tokenize_corpus(train_df['input_text'].values, tokenizer)
eval_body_input_ids, eval_body_attention_masks = tokenize_corpus(eval_df['input_text'].values, tokenizer)
test_body_input_ids, test_body_attention_masks = tokenize_corpus(test_df['input_text'].values, tokenizer)

# create tokenized data - target_text - max_len=2
train_target_input_ids, train_target_attention_masks = tokenize_corpus(train_df['target_text'].values, tokenizer, max_len=2)
eval_target_input_ids, eval_target_attention_masks = tokenize_corpus(eval_df['target_text'].values, tokenizer, max_len=2)
test_target_input_ids, test_target_attention_masks = tokenize_corpus(test_df['target_text'].values, tokenizer, max_len=2)




In [11]:
train_df.isnull().sum()

prefix         0
input_text     0
target_text    0
dtype: int64

In [12]:
## Function to prepare dataset
def prepare_dataset(body_tokens, body_masks, target_token, target_masks):
  tensor_df = TensorDataset(body_tokens, body_masks, target_token, target_masks)
  return tensor_df

# create tensor data sets
train_dataset = prepare_dataset(train_body_input_ids, train_body_attention_masks, train_target_input_ids, train_target_attention_masks)
eval_dataset = prepare_dataset(eval_body_input_ids, eval_body_attention_masks, eval_target_input_ids, eval_target_attention_masks)
test_dataset = prepare_dataset(test_body_input_ids, test_body_attention_masks, test_target_input_ids, test_target_attention_masks)


In [13]:
## Instantiate training models
## Training
def train(model, dataloader, optimizer):

    # capture time
    total_t0 = time.time()

    # Perform one full pass over the training set.
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch + 1, epochs))
    print('Training...')

    # reset total loss for epoch
    train_total_loss = 0
    total_train_f1 = 0

    # put model into traning mode
    model.train()

    # for each batch of training data...
    for step, batch in enumerate(dataloader):

        # progress update every 40 batches.
        if step % 40 == 0 and not step == 0:

            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(dataloader)))
            #print('Train loss: ', train_total_loss)

        # Unpack this training batch from our dataloader:
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input tokens
        #   [1]: attention masks
        #   [2]: target tokens
        #   [3]: target attenion masks
        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_target_ids = batch[2].cuda()
        b_target_mask = batch[3].cuda()

        # clear previously calculated gradients
        optimizer.zero_grad()

        # runs the forward pass with autocasting.
        with autocast():
            # forward propagation (evaluate model on training batch)
            outputs = model(input_ids=b_input_ids,
                            attention_mask=b_input_mask,
                            labels=b_target_ids,
                            decoder_attention_mask=b_target_mask)

            loss, prediction_scores = outputs[:2]

            # sum the training loss over all batches for average loss at end
            # loss is a tensor containing a single value
            # print("loss: ", loss)
            if math.isnan(loss.item())==False:
              train_total_loss += loss.item()

        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        # Backward passes under autocast are not recommended.
        # Backward ops run in the same dtype autocast chose for corresponding forward ops.

        scaler.scale(loss).backward()

        # Clip gradient
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)

        # scaler.step() first unscales the gradients of the optimizer's assigned params.
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        scaler.step(optimizer)

        # Updates the scale for next iteration.
        scaler.update()

        # update the learning rate
        scheduler.step()

    # calculate the average loss over all of the batches
    avg_train_loss = train_total_loss / len(dataloader)

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'Train Loss': avg_train_loss,
            'Total train loss': train_total_loss
        }
    )

    # training time end
    training_time = format_time(time.time() - total_t0)

    # print result summaries
    print("")
    print("summary results")
    print("epoch | trn loss | total trn loss | trn time ")
    print(f"{epoch+1:5d} | {avg_train_loss:.5f} | {train_total_loss:.5f} | {training_time:}")

    return training_stats


In [14]:
## Validation
def validating(model, dataloader):

    # capture validation time
    total_t0 = time.time()

    # After the completion of each training epoch, measure our performance on
    # our validation set.
    print("")
    print("Running Validation...")

    # put the model in evaluation mode
    model.eval()

    # track variables
    total_valid_loss = 0

    # evaluate data for one epoch
    for batch in dataloader:

        # Unpack this training batch from our dataloader:
        # `batch` contains three pytorch tensors:
        #   [0]: input tokens
        #   [1]: attention masks
        #   [2]: target tokens
        #   [3]: target attenion masks
        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_target_ids = batch[2].cuda()
        b_target_mask = batch[3].cuda()

        # tell pytorch not to bother calculating gradients
        # as its only necessary for training
        with torch.no_grad():

            # forward propagation (evaluate model on training batch)
            outputs = model(input_ids=b_input_ids,
                            attention_mask=b_input_mask,
                            labels=b_target_ids,
                            decoder_attention_mask=b_target_mask)

            loss, prediction_scores = outputs[:2]

            # sum the training loss over all batches for average loss at end
            # loss is a tensor containing a single value
            total_valid_loss += loss.item()

    # calculate the average loss over all of the batches.
    global avg_val_loss
    avg_val_loss = total_valid_loss / len(dataloader)

    # Record all statistics from this epoch.
    valid_stats.append(
        {
            'Val Loss': avg_val_loss,
            'Val PPL.': np.exp(avg_val_loss)
        }
    )

    # capture end validation time
    training_time = format_time(time.time() - total_t0)

    # print result summaries
    print("")
    print("summary results")
    print("epoch | val loss | val ppl | val time")
    print(f"{epoch+1:5d} | {avg_val_loss:.5f} | {np.exp(avg_val_loss):.3f} | {training_time:}")

    return valid_stats


In [15]:
## Testing
def testing(model, dataloader):

    print("")
    print("Running Testing...")

    # measure training time
    t0 = time.time()

    # put the model in evaluation mode
    model.eval()

    # track variables
    total_test_loss = 0
    total_test_acc = 0
    total_test_f1 = 0
    predictions = []
    actuals = []
    all_prediction_scores = []

    # evaluate data for one epoch
    for step, batch in enumerate(dataloader):
        # progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(dataloader), elapsed))

        # Unpack this training batch from our dataloader:
        # `batch` contains three pytorch tensors:
        #   [0]: input tokens
        #   [1]: attention masks
        #   [2]: target tokens
        #   [3]: target attenion masks
        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_target_ids = batch[2].cuda()
        b_target_mask = batch[3].cuda()

        # tell pytorch not to bother calculating gradients
        # as its only necessary for training
        with torch.no_grad():

            # forward propagation (evaluate model on training batch)
            outputs = model(input_ids=b_input_ids,
                            attention_mask=b_input_mask,
                            labels=b_target_ids,
                            decoder_attention_mask=b_target_mask)

            loss, prediction_scores = outputs[:2]

            total_test_loss += loss.item()

            generated_ids = model.generate(
                    input_ids=b_input_ids,
                    attention_mask=b_input_mask,
                    max_length=3
                    )

            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True) for t in b_target_ids]

            total_test_acc += accuracy_score(target, preds)
            total_test_f1 += f1_score(preds, target,
                                       average='weighted',
                                       labels=np.unique(preds))
            predictions.extend(preds)
            actuals.extend(target)
            all_prediction_scores.extend(prediction_scores)

    # calculate the average loss over all of the batches.
    avg_test_loss = total_test_loss / len(dataloader)

    avg_test_acc = total_test_acc / len(test_dataloader)

    avg_test_f1 = total_test_f1 / len(test_dataloader)

    # Record all statistics from this epoch.
    test_stats.append(
        {
            'Test Loss': avg_test_loss,
            'Test PPL.': np.exp(avg_test_loss),
            'Test Acc.': avg_test_acc,
            'Test F1': avg_test_f1
        }
    )
    global df2
    temp_data = pd.DataFrame({'predicted': predictions, 'actual': actuals})
    df2 = df2.append(temp_data)

    return test_stats, all_prediction_scores


In [16]:
# time function
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [17]:
## Prepare for training
model = T5ForConditionalGeneration.from_pretrained('t5-base').cuda()  # to GPU

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)

valid_dataloader = DataLoader(eval_dataset, batch_size=24, shuffle=False)

test_dataloader = DataLoader(test_dataset, batch_size=24, shuffle=False)


# Adam w/ Weight Decay Fix
# set to optimizer_grouped_parameters or model.parameters()
optimizer = AdamW(model.parameters(), lr = 1e-4)

# epochs
epochs = 5

# lr scheduler
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# create gradient scaler for mixed precision
scaler = GradScaler()

Downloading model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



In [20]:
# create training result storage
training_stats = []
valid_stats = []
best_valid_loss = float('inf')

# for each epoch
for epoch in range(epochs):
    # train
    train(model, train_dataloader, optimizer)
    # validate
    validating(model, valid_dataloader)
    # check validation loss
    if valid_stats[epoch]['Val Loss'] < best_valid_loss:
        best_valid_loss = valid_stats[epoch]['Val Loss']
        # save best model for use later
        torch.save(model.state_dict(), model_op_path + 't5-classification.pt')  # torch save
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(model_op_path + '/model_save/t5-classification/')  # transformers save
        tokenizer.save_pretrained(model_op_path + '/model_save/t5-classification/')  # transformers save


Training...
  Batch    40  of    879.
  Batch    80  of    879.
  Batch   120  of    879.
  Batch   160  of    879.
  Batch   200  of    879.
  Batch   240  of    879.
  Batch   280  of    879.
  Batch   320  of    879.
  Batch   360  of    879.
  Batch   400  of    879.
  Batch   440  of    879.
  Batch   480  of    879.
  Batch   520  of    879.
  Batch   560  of    879.
  Batch   600  of    879.
  Batch   640  of    879.
  Batch   680  of    879.
  Batch   720  of    879.
  Batch   760  of    879.
  Batch   800  of    879.
  Batch   840  of    879.

summary results
epoch | trn loss | total trn loss | trn time 
    1 | 1.27117 | 1117.35667 | 0:02:46

Running Validation...

summary results
epoch | val loss | val ppl | val time
    1 | 0.50189 | 1.652 | 0:00:11

Training...
  Batch    40  of    879.
  Batch    80  of    879.
  Batch   120  of    879.
  Batch   160  of    879.
  Batch   200  of    879.
  Batch   240  of    879.
  Batch   280  of    879.
  Batch   320  of    879.
  Batc

In [21]:
with torch.no_grad():
  torch.cuda.empty_cache()

In [22]:
# test the model
df2 = pd.DataFrame({'predicted': [], 'actual': []})
test_stats = []

# Load best model
model.load_state_dict(torch.load(model_op_path + 't5-classification.pt'))

<All keys matched successfully>

In [23]:
test_stats, all_prediction_scores = testing(model, test_dataloader)


Running Testing...
  Batch    40  of     74.    Elapsed: 0:00:17.


  df2 = df2.append(temp_data)


In [24]:
test_stats, df2

([{'Test Loss': 0.3742058516756908,
   'Test PPL.': 1.4538363943138208,
   'Test Acc.': 0.6853281853281853,
   'Test F1': 0.6851937802699639}],
       predicted     actual
 0        normal     normal
 1          hate       hate
 2        normal     normal
 3     offensive  offensive
 4          hate  offensive
 ...         ...        ...
 1754  offensive     normal
 1755     normal  offensive
 1756     normal     normal
 1757     normal     normal
 1758  offensive  offensive
 
 [1759 rows x 2 columns])

In [25]:
correct = (df2['predicted'].apply(lambda x: x.strip()) == df2['actual'].apply(lambda x: x.strip())).sum()
acc = correct/df2.shape[0]
acc

0.6850483229107447

In [26]:
# Softmax function
def softmax(x):
    max = np.max(x,axis=1,keepdims=True) #returns max of each row and keeps same dims
    e_x = np.exp(x - max) #subtracts each row with its max value
    sum = np.sum(e_x,axis=1,keepdims=True) #returns sum of each row and keeps same dims
    f_x = e_x / sum
    return f_x


## Offensive token = 12130
## Normal token = 1389
## Hate token = 5591

# Function to get probabilities
# all prediction scores = list of logits of len(test_df)
# ith item in all_prediction_scores contain 2 tensors - logits for first word, and logits for second word over 32K tokens.
# This function extracts the logits for normal, offensive and hate, and converts them to probabilities by using a softmax
def convert_to_prob(all_prediction_scores):
  probs = np.zeros((len(all_prediction_scores), 3))
  for i in range(len(all_prediction_scores)):
    ## extract logits for normal, hate and offensive
    offensive_logit = all_prediction_scores[i][0][12130]
    normal_logit = all_prediction_scores[i][0][1389]
    hate_logit = all_prediction_scores[i][0][5591]
    probs[i][0] = normal_logit
    probs[i][1] = offensive_logit
    probs[i][2] = hate_logit
  probs_softmax = softmax(probs)
  df_probs = pd.DataFrame(probs_softmax, columns=['normal','offensive','hate'])
  return df_probs


In [27]:
# Convert logits to probability
df_probs = convert_to_prob(all_prediction_scores)
# Save probabilities
df_probs.to_csv(model_op_path + 'test_probabilities.csv', index=False)
df_probs.head()

Unnamed: 0,normal,offensive,hate
0,0.665071,0.262283,0.072646
1,0.037128,0.12364,0.839232
2,0.496479,0.216747,0.286775
3,0.213326,0.740648,0.046027
4,0.102435,0.234185,0.66338
