In [1]:

!curl -LO https://raw.githubusercontent.com/MohamadMerchant/SNLI/master/data.tar.gz
!tar -xvzf data.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 11.1M  100 11.1M    0     0  12.8M      0 --:--:-- --:--:-- --:--:-- 12.8M
SNLI_Corpus/
SNLI_Corpus/snli_1.0_dev.csv
SNLI_Corpus/snli_1.0_train.csv
SNLI_Corpus/snli_1.0_test.csv


In [1]:
!pip install transformers
!pip install tensorflow_addons



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
import transformers
from sklearn.manifold import TSNE
#from tensorflow.keras.utils import plot_model
import logging
logging.getLogger('tensorflow').disabled = True
tf.config.run_functions_eagerly(True)
pd.set_option('max_colwidth', 400)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.8.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [3]:
!pip install mlflow
!pip install pytorch_lightning
!pip install tensorflow==2.8



In [4]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

Number of devices: 1


In [5]:
train_df = pd.read_csv("/content/SNLI_Corpus/snli_1.0_train.csv", nrows=100000)
valid_df = pd.read_csv("/content/SNLI_Corpus/snli_1.0_dev.csv")
test_df = pd.read_csv("/content/SNLI_Corpus/snli_1.0_test.csv")

train_df = train_df[train_df.similarity != "-"].sample(frac=1.0, random_state=42).reset_index(drop=True)
valid_df = valid_df[valid_df.similarity != "-"].sample(frac=1.0, random_state=42).reset_index(drop=True)
len(train_df)


99890

In [6]:
train_df=train_df[0:300]
valid_df=valid_df[0:100]
test_df=test_df[0:40]

In [7]:
print(train_df.head(5))

      similarity  \
0  contradiction   
1        neutral   
2     entailment   
3        neutral   
4     entailment   

                                                                                        sentence1  \
0                                                 A woman is using toy which blows giant bubbles.   
1                                      A young Asian girl holds a stuffed cat toy in a classroom.   
2  A young woman with an afro and an electronic device in her hands walks next to an orange bike.   
3                      A young asian girl is sliding down a pole on outdoor playground equipment.   
4                                                                   a man is walking with a cane.   

                                                                                                             sentence2  
0                                                                   A little girl is playing with chalk on a driveway.  
1  A young Asian girl sits in 

In [8]:
max_length = 64
batch_size = 32

In [10]:
#testing
key,value,o=enumerate(train_df['similarity'].astype('category').cat.categories)
print(key,value,o)

(0, 'contradiction') (1, 'entailment') (2, 'neutral')


In [9]:
label_map = dict(enumerate(train_df['similarity'].astype('category').cat.categories))
y_train = train_df['similarity'].map({v:k for k, v in label_map.items()}).values
y_val = valid_df['similarity'].map({v:k for k, v in label_map.items()}).values
y_test = test_df['similarity'].map({v:k for k, v in label_map.items()}).values

In [None]:
tokenizer = transformers.BertTokenizer.from_pretrained( "bert-base-uncased", do_lower_case=True)
print(len(tokenizer.get_vocab()))
tokenizer.encode('Hello Tensorflow')

In [13]:
#label encoding
label_map = dict(enumerate(train_df['similarity'].astype('category').cat.categories))
len(label_map.values())

3

In [14]:
sentence_pairs = train_df[["sentence1", "sentence2"]].values[:5]
encoded = tokenizer.batch_encode_plus(
    sentence_pairs.tolist(),
    add_special_tokens=True,
    max_length=max_length,
    return_attention_mask=True,
    return_token_type_ids=True,
    padding='max_length',
    return_tensors="tf")

print(encoded.keys())

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])


In [15]:
''' input_ids: Numerical representations of tokens in the input text. They map each token to an integer in the model's vocabulary.

attention_mask: Specifies which tokens should be attended to, and which should be ignored (e.g., padding tokens). It's vital for models to focus on meaningful content.

token_type_ids: Differentiate segments in tasks involving multiple sequences (e.g., question-answering). They indicate to the model which part of the input each token belongs to.'''
print(encoded['input_ids'][0][:32])
print(encoded['token_type_ids'][0][:32])
print(encoded['attention_mask'][0][:32])

tf.Tensor(
[  101  1037  2450  2003  2478  9121  2029 13783  5016 17255  1012   102
  1037  2210  2611  2003  2652  2007 16833  2006  1037 11202  1012   102
     0     0     0     0     0     0     0     0], shape=(32,), dtype=int32)
tf.Tensor([0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0], shape=(32,), dtype=int32)
tf.Tensor([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0], shape=(32,), dtype=int32)


In [16]:
tokenizer.decode(encoded['input_ids'][1][:32])

'[CLS] a young asian girl holds a stuffed cat toy in a classroom. [SEP] a young asian girl sits in class with a stuffed cat toy, the only surviving possession'

In [10]:
label_map = dict(enumerate(train_df['similarity'].astype('category').cat.categories))
y_train = train_df['similarity'].map({v:k for k, v in label_map.items()}).values
y_val = valid_df['similarity'].map({v:k for k, v in label_map.items()}).values
y_test = test_df['similarity'].map({v:k for k, v in label_map.items()}).values

In [18]:
print(type(y_train))
print(y_test)

<class 'numpy.ndarray'>
[2 1 0 2 1 0 1 2 0 2 1 0 1 1 0 0 1 2 1 0 2 0 1 2 2 0 1 1 2 0 0 1 1 1 2 0 2
 1 0 1]


In [None]:
class BertDataGenerator(tf.keras.utils.Sequence):
    """Generates batches of data.

    Args:
        sentence_pairs: Array of premise and hypothesis input sentences.
        labels: Array of labels.
        batch_size: Integer batch size.
        shuffle: boolean, whether to shuffle the data.
        include_targets: boolean, whether to incude the labels.

    Returns:
        Tuples `([input_ids, attention_mask, `token_type_ids], labels)`
        (or just `[input_ids, attention_mask, `token_type_ids]`
         if `include_targets=False`)
    """

    def __init__(self, sentence_pairs, labels, batch_size=batch_size, shuffle=True, include_targets=True):

        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        self.tokenizer = transformers.BertTokenizer.from_pretrained( "bert-base-uncased", do_lower_case=True)
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()
        print("INIT")

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are, encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            padding='max_length',
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        #print("input_ids",input_ids)
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        #print("attention",attention_masks)
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
        #print("token",token_type_ids )

        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            print("LABELS ",labels[0],labels.shape,labels)
            #print([input_ids, attention_masks, token_type_ids])
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            print("working correct block")
            print([input_ids, attention_masks, token_type_ids])
            return [[input_ids, attention_masks, token_type_ids]]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)


In [53]:
train_data = BertDataGenerator(train_df[["sentence1", "sentence2"]].values.astype("str"), y_train, batch_size=batch_size, shuffle=True)
valid_data = BertDataGenerator(valid_df[["sentence1", "sentence2"]].values.astype("str"), y_val, batch_size=batch_size, shuffle=False)

INIT
INIT


In [48]:
def build_model():

  # Encoded token ids from BERT tokenizer
  input_ids = tf.keras.layers.Input(shape=(max_length, ), dtype=tf.int32, name="input_ids")
  # Attention masks indicates to the model which tokens should be attended to
  attention_masks = tf.keras.layers.Input(shape=(max_length, ), dtype=tf.int32, name="attention_masks")
  # Token type ids are binary masks identifying different sequences in the model
  token_type_ids = tf.keras.layers.Input(shape=(max_length, ), dtype=tf.int32, name="token_type_ids")

  # Loading pretrained BERT model, freeze the weight, check bert_model.config to configure
  bert_model = transformers.TFBertModel.from_pretrained("bert-base-uncased")
  bert_model.trainable = False

  print("input_ids1",input_ids)
  bert_output = bert_model(input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids, output_attentions=False, output_hidden_states=False)
  sequence_output = bert_output["last_hidden_state"] # (m, seq_len, emb_dim)
  pooled_output = bert_output["pooler_output"] # (m, emb_dim)

  # Add trainable layers on top of Bert to adapt the pretrained features on the new data.
  bi_lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(2, return_sequences=True))(sequence_output) # (m, emb_dim, hidden_unit * 2)
  print("input_ids2",input_ids)
  # Applying hybrid pooling approach to bi_lstm sequence output.
  avg_pool = tf.keras.layers.GlobalAveragePooling1D()(bi_lstm) # (m, hidden_unit)
  max_pool = tf.keras.layers.GlobalMaxPooling1D()(bi_lstm) # (m, hidden_unit)
  concat = tf.keras.layers.concatenate([avg_pool, max_pool]) # (m, hidden_unit)
  dropout = tf.keras.layers.Dropout(0.3)(concat)
  output = tf.keras.layers.Dense(3, activation="softmax")(dropout) #(m, 3)

  model = tf.keras.models.Model(inputs=[input_ids, attention_masks, token_type_ids], outputs=output)
  return model

In [18]:
print("Eager execution:", tf.executing_eagerly())

Eager execution: True


In [54]:
bert_encoder = build_model()
bert_encoder.compile(optimizer=tf.keras.optimizers.Adam(), loss="sparse_categorical_crossentropy", metrics=["acc"])
bert_encoder.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

input_ids1 KerasTensor(type_spec=TensorSpec(shape=(None, 64), dtype=tf.int32, name='input_ids'), name='input_ids', description="created by layer 'input_ids'")
input_ids2 KerasTensor(type_spec=TensorSpec(shape=(None, 64), dtype=tf.int32, name='input_ids'), name='input_ids', description="created by layer 'input_ids'")
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 64)]         0           []                               
                                                                                                  
 attention_masks (InputLayer)   [(None, 64)]         0           []                               
                                                                                                  
 token_type_ids (InputLayer)    [(None, 64)]         0           []    

In [None]:
epochs = 1
history = bert_encoder.fit(train_data, validation_data=valid_data, epochs=epochs, use_multiprocessing=True, workers=-1, steps_per_epoch=10)

In [None]:
''' This line sets the fourth layer (index 3, as indexing starts from 0) of the bert_encoder model to be
trainable. It means that during the training process, the weights of this specific layer will be updated,
allowing the model to fine-tune the parameters of this layer on your specific dataset.
This can help improve performance by adapting pre-learned features to new tasks.'''
bert_encoder.layers[3].trainable = True
# Recompile the model to make the change effective.
bert_encoder.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
bert_encoder.summary()

# train entire model
history = bert_encoder.fit(train_data, validation_data=valid_data, epochs=epochs, use_multiprocessing=True, workers=-1,)

In [56]:
y_test = test_df['similarity'].map({v:k for k, v in label_map.items()}).values
print(np.unique(y_test))
y_test=np.nan_to_num(y_test, nan=0)

#test_df=test_df[0:40]
conditions = [y_test == 0, y_test == 1, y_test == 2]
choices = [1, 2, 3]
y_test=y_test.astype(int)
# Replace values
#y_test= np.select(conditions, choices, default=y_test)
print(np.unique(y_test))
print(test_df[0:5])
print(y_test.shape)

[0 1 2]
[0 1 2]
      similarity  \
0        neutral   
1     entailment   
2  contradiction   
3        neutral   
4     entailment   

                                                                                    sentence1  \
0  This church choir sings to the masses as they sing joyous songs from the book at a church.   
1  This church choir sings to the masses as they sing joyous songs from the book at a church.   
2  This church choir sings to the masses as they sing joyous songs from the book at a church.   
3                             A woman with a green headscarf, blue shirt and a very big grin.   
4                             A woman with a green headscarf, blue shirt and a very big grin.   

                               sentence2  
0  The church has cracks in the ceiling.  
1        The church is filled with song.  
2    A choir singing at a baseball game.  
3                    The woman is young.  
4               The woman is very happy.  
(40,)


In [57]:
y_test

array([2, 1, 0, 2, 1, 0, 1, 2, 0, 2, 1, 0, 1, 1, 0, 0, 1, 2, 1, 0, 2, 0,
       1, 2, 2, 0, 1, 1, 2, 0, 0, 1, 1, 1, 2, 0, 2, 1, 0, 1])

In [None]:
test_df[["sentence1", "sentence2"]].values.astype("str")

In [58]:
test_data = BertDataGenerator(test_df[["sentence1", "sentence2"]].values.astype("str"), y_test, batch_size=batch_size, shuffle=False)
bert_encoder.evaluate(test_data, verbose=1)

INIT






[1.1689131259918213, 0.28125]

In [62]:
def check_similarity(sentence1, sentence2):
    print( np.array([[str(sentence1), str(sentence2)]]))
    sentence_pairs =  np.array([[str(sentence1), str(sentence2)]])
    test_data = BertDataGenerator(sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False)
    print("TD",test_data)
    proba = bert_encoder.predict(test_data)[0]
    print("CS",proba)
    idx = np.argmax(proba)
    proba = f"{proba[idx]: .2f}%"
    pred = label_map.get(idx)
    return pred, proba

In [64]:
sentence1 = "Two women are observing something together."
sentence2 = "Two women are standing in different direction"
check_similarity(sentence1, sentence2)

[['Two women are observing something together.'
  'Two women are standing in different direction']]
INIT
TD <__main__.BertDataGenerator object at 0x7c9d38e3f670>
working correct block
[array([[  101,  2048,  2308,  2024, 14158,  2242,  2362,  1012,   102,
         2048,  2308,  2024,  3061,  1999,  2367,  3257,   102,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0]], dtype=int32), array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,

('neutral', ' 0.47%')

PYTORCH LIGHTENING

In [11]:
import pandas as pd
import numpy as np
import sys, os
import yaml, logging
import mlflow
from mlflow.tracking import MlflowClient
import mlflow.pytorch
#from skmultilearn.model_selection import iterative_train_test_split
import transformers
import torch
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import classification_report, f1_score
from tqdm import tqdm
from typing import Optional, Union

In [12]:
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [13]:
def split_data(
    df:pd.DataFrame,
    aspect_classes:list,
    x_col:list=['text', 'label'],
    test_size:float=0.2,
    seed:int=0):
    '''Split data into test train set'''

    np.random.seed(seed)
    x = df[x_col].values
    y = df[aspect_classes].values
    X_train, y_train, X_test, y_test = iterative_train_test_split(x, y, test_size=test_size)
    return pd.DataFrame(X_train, columns=x_col), pd.DataFrame(X_test, columns=x_col)


class DataModule(pl.LightningDataModule):
    def __init__(
        self, df_train:pd.DataFrame, df_test:pd.DataFrame, max_len:int, batch_size:int,
        tokenizer:str="distilbert-base-uncased", text_col:str='text',y_train=0,y_val=0
        ):
        '''Picking Up Raw Data and Processing'''

        super().__init__()
        self.train_df = df_train
        self.test_df = df_test
        self.max_len = max_len
        self.batch_size = batch_size
        self.text_col = text_col
        self.y_train=y_train
        self.y_val=y_val

        if tokenizer == "distilbert-base-uncased":
            print("Applying Distillbert Tokenizer")
            self.tokenizer = transformers.DistilBertTokenizer.from_pretrained(
                "distilbert-base-uncased")
        else:
            #logger.info("Applying Bertweet Tokenizer")
            self.tokenizer = transformers.BertweetTokenizer.from_pretrained(
                "vinai/bertweet-base", normalization=True)

    class Dataset(Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels

        def __getitem__(self, idx):
            item = {
                key: torch.tensor(val[idx]).clone().detach()
                for key, val in self.encodings.items()
                }
            item['labels'] = torch.tensor(self.labels[idx])
            return item

        def __len__(self):
            return len(self.labels)

    def train_dataloader(self):
        '''Return DataLoader for train tokens and labels'''

        features = self.tokenizer(

            self.train_df[self.text_col].values.astype("str").tolist(),
            max_length=self.max_len,
            truncation=True,
            padding='max_length',
            return_tensors='pt')
        #print("FEA",features)

        labels=torch.from_numpy(self.y_train)
        #print(labels)
        one_hot_labels = torch.nn.functional.one_hot(labels, num_classes=3)
        #print(one_hot_labels)
        dataset = self.Dataset(features, one_hot_labels)
        return DataLoader(dataset, batch_size=self.batch_size, shuffle=True, num_workers=5)

    def val_dataloader(self):
        '''Return DataLoader for test tokens and labels'''

        # only pad to longest length of the current batch
        features = self.tokenizer(
            self.test_df[self.text_col].values.astype("str").tolist(),
            max_length=self.max_len,
            truncation=True,
            padding='longest',
            return_tensors='pt')

        input_ids = np.array(features["input_ids"], dtype="int32")
        #print("input_ids",input_ids)
        attention_masks = np.array(features["attention_mask"], dtype="int32")
        #print("attention",attention_masks)
        token_type_ids = np.array(features["token_type_ids"], dtype="int32")

        #labels = self.test_df['label'].tolist()
        labels=torch.from_numpy(self.y_val)
        one_hot_labels = torch.nn.functional.one_hot(labels, num_classes=3)
        #print(one_hot_labels)
        dataset = self.Dataset(features, one_hot_labels)
        return DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=5)

    def calculate_pos_weights(self, class_counts, len_data):
        '''calculate weight for imbalance data'''
        pos_weights = np.ones_like(class_counts)
        neg_counts = [len_data - pos_count for pos_count in class_counts]
        for cdx, (pos_count, neg_count) in enumerate(zip(class_counts,  neg_counts)):
            pos_weights[cdx] = neg_count / (pos_count + 1e-5)
            # pos_weights[cdx] = 1. if pos_weights[cdx] == 0 else pos_weights[cdx]
        return torch.as_tensor(pos_weights, dtype=torch.float)

    def get_weight(self, df, aspect_classes):
        return self.calculate_pos_weights(
             df[aspect_classes].sum().values,
             len(df)
         )

In [14]:
!pip3 install emoji==0.6.0

Collecting emoji==0.6.0
  Downloading emoji-0.6.0.tar.gz (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.6.0-py3-none-any.whl size=49719 sha256=3e9dbd59105d6425b4393e017c5a8e8093f03490d7e7e30cabff56d72ceb9383
  Stored in directory: /root/.cache/pip/wheels/1b/bd/d9/310c33c45a553798a714e27e3b8395d37128425442b8c78e07
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.6.0


In [15]:
#INPUT = 'sample.csv'
X_COL = ['text']
Y_COL = ['contradiction','entailment','neutral']

BATCH_SIZE = 32
INPUT_MAX_LEN = 64
STANDARD_LR = 5e-5
FINE_LR = 5e-7
EPOCHS = 1
LIMIT_STEP = 5
MODEL = "vinai/bertweet-base"
TEXT_COL = ["sentence1", "sentence2"]

#df = pd.read_parquet(INPUT)
#df_train, df_val = split_data(df, Y_COL, X_COL, test_size=0.2, seed=0)
print(len(y_test.tolist()))
data_module = DataModule(
    train_df,
    valid_df,
    max_len=INPUT_MAX_LEN,
    batch_size=BATCH_SIZE,
    tokenizer=MODEL,
    text_col=TEXT_COL,
    y_train=y_train,
    y_val=y_val)

40


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.91M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

In [16]:
train_loader = data_module.train_dataloader()
print("LOA",train_loader)

LOA <torch.utils.data.dataloader.DataLoader object at 0x7f8343aa9ed0>




In [17]:

class LightningArticleClassifier(pl.LightningModule):
    def __init__(
        self, output_class_len, learning_rate,
        max_len=64, hidden_dim=64, pos_weight=None, bert_model="distilbert-base-uncased"
        ):

        super().__init__()
        self.max_len = max_len
        self.lr = learning_rate
        self.emb_dim = 768
        self.hidden_dim = hidden_dim
        self.drop_out = torch.nn.Dropout(0.1)
        self.fc1 = torch.nn.Linear(self.emb_dim, self.emb_dim // 2)
        self.fc2 = torch.nn.Linear(self.emb_dim // 2, self.hidden_dim * 4)
        self.fc3 = torch.nn.Linear(self.hidden_dim * 4, self.hidden_dim)
        self.fc4 = torch.nn.Linear(self.hidden_dim, output_class_len)
        self.tanh = torch.nn.Tanh()
        self.gelu = torch.nn.GELU()
        self.softmax = torch.nn.Softmax(dim=1)

        if pos_weight is not None:
            self.pos_weight = torch.tensor(pos_weight, dtype=torch.float)
        else:
            self.pos_weight = None

        if bert_model == "distilbert-base-uncased":
            logger.info("Importing Distillbert Model")
            self.bert_model = transformers.DistilBertModel.from_pretrained("distilbert-base-uncased")
        else:
            logger.info("Importing Bertweet Model")
            self.bert_model = transformers.AutoModel.from_pretrained("vinai/bertweet-base")

        # metrics
        self.val_loss, self.val_corrects, self.val_len = 0., 0., 0.
        self.train_loss, self.train_corrects, self.train_len = 0., 0., 0.
        # self.train_f1, self.val_f1 = 0., 0.
        self.train_step, self.val_step = 0, 0
        self.epoch_loss_train, self.epoch_acc_train,  self.epoch_f1_train = [], [], []
        self.epoch_loss_val, self.epoch_acc_val, self.epoch_f1_val = [], [], []

    def forward(self, input_ids, attention_mask):
        bert_output = self.bert_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_attentions=False,
            output_hidden_states=False)

        # feed forward layer
        # output = bert_output['pooled_output'][:, :]
        output = bert_output['last_hidden_state'][:, 0, :]
        output = self.fc1(output)
        output = self.tanh(output)
        output = self.drop_out(output)
        output = self.fc2(output)
        output = self.gelu(output)
        output = self.drop_out(output)
        output = self.fc3(output)
        output = self.gelu(output)
        output = self.drop_out(output)
        output = self.fc4(output)

        #output = torch.sigmoid(output)
        output = self.softmax(output)
        print("output",output.size(),output[0])
        return output

    def criterion(self, y_pred, y_true):

        if self.pos_weight != None:
            criterion = torch.nn.CrossEntropyLoss(weight=self.pos_weight.cuda())
            #criterion = torch.nn.BCEWithLogitsLoss(pos_weight=self.pos_weight.cuda())
        else:
            print("CROSS ENTROPY")
            print("YPRED ",y_pred[0]," ",y_true[0])
            criterion = torch.nn.CrossEntropyLoss()
            #criterion = torch.nn.BCEWithLogitsLoss()
        return criterion(y_pred, y_true.float())

    def training_step(self, train_batch, batch_idx):
        input_ids = train_batch['input_ids']
        attention_mask = train_batch['attention_mask']
        labels = train_batch['labels']
        print("labels",labels)
        y_pred = self.forward(input_ids, attention_mask)
        print("Ypred",y_pred," ",labels)
        loss = self.criterion(y_pred, labels)
        # _, preds = torch.max(y_pred, 1)
        preds = (y_pred >= 0.5).int()

        self.train_loss += loss
        # self.train_corrects += torch.sum(torch.sum(preds == labels.data))
        self.train_corrects += torch.sum(torch.all(torch.eq(preds, labels), dim=1).int())
        # self.train_f1 += f1_score(preds.cpu(), labels.data.cpu(), average='macro')

        self.train_len += len(labels)
        self.train_step += 1
        self.log('train_loss', loss)

        return loss

    def on_train_epoch_end(self):
        self.epoch_acc_train.append(self.train_corrects / (self.train_len + 1))
        self.epoch_loss_train.append(self.train_loss / (self.train_len + 1))
        # self.epoch_f1_train.append(self.train_f1 / self.train_step)

        if self.current_epoch % 2 == 0:
            print(f'\nEpoch: {self.current_epoch}')
            print(f'Training: loss: {self.epoch_loss_train[-1]}')
            print(f'Training: Accuracy: {self.epoch_acc_train[-1]}')
            # print(f'Training: Macro F1: {self.epoch_f1_train[-1]}')

        self.train_loss, self.train_corrects = 0., 0.
        self.train_step, self.train_len = 0., 0.

    def validation_step(self, val_batch, batch_idx):
        input_ids = val_batch['input_ids']
        attention_mask = val_batch['attention_mask']
        labels = val_batch['labels']
        y_pred = self.forward(input_ids, attention_mask)
        loss = self.criterion(y_pred, labels)
        preds = (y_pred >= 0.5).int()
        # _, preds = torch.max(y_pred, 1)

        self.val_loss += loss
        # self.val_corrects += torch.sum(torch.sum(preds == labels.data))
        self.val_corrects += torch.sum(torch.all(torch.eq(preds, labels), dim=1).int())

        # self.val_f1 += f1_score(preds.cpu(), labels.data.cpu(), average='macro')

        self.val_len += len(labels)
        self.val_step += 1
        self.log('val_loss', loss)
        return loss

    def on_validation_epoch_end(self):
        self.epoch_acc_val.append(self.val_corrects / (self.val_len + 1))
        self.epoch_loss_val.append(self.val_loss / (self.val_len + 1))
        # self.epoch_f1_val.append(self.val_f1 / self.val_step)

        if self.current_epoch % 2 == 0:
            #logger.info(f'Validation: loss: {self.epoch_loss_val[-1]}')
            print(f'Validation: Accuracy: {self.epoch_acc_val[-1]}')
            # print(f'Validation: Macro F1: {self.epoch_f1_val[-1]}')

        self.val_loss, self.val_corrects = 0., 0.
        self.val_step, self.val_len = 0., 0.

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        '''
        lr_scheduler = {'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min'),
                        "monitor": "train_loss",
                       }

        return [optimizer], [lr_scheduler]
        '''
        return optimizer

In [18]:
model = LightningArticleClassifier(
    output_class_len=3,
    learning_rate=STANDARD_LR,
    max_len=INPUT_MAX_LEN,
    # pos_weight=[1, 5, 1, 1, 1]
    bert_model=MODEL
)

trainer = pl.Trainer(
    max_epochs=1,
    limit_train_batches=LIMIT_STEP,
    #accelerator="cpu",
    #strategy="ddp"

    # accelerator='ddp',
    # default_root_dir='/dbfs/FileStore/temp/kean_temp/logs'
)

for param in model.bert_model.parameters():
    param.requires_grad = False
trainer.fit(model, data_module)

### fine tuning
if FINE_LR is not None:
    trainer = pl.Trainer(
        max_epochs=1,
        limit_train_batches=LIMIT_STEP,
        #accelerator="gpu",
        #strategy="ddp"

        # accelerator='ddp',
        # default_root_dir='/dbfs/FileStore/temp/kean_temp/logs'
    )

    model.lr = FINE_LR
    for param in model.bert_model.parameters():
        param.requires_grad = True
    trainer.fit(model, data_module)


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type         | Params
--------------------------------------------
0 | drop_out   | Dropout      | 0     
1 | fc1        | Linear       | 295 K 
2 | fc2        | Linear       | 98.6 K
3 | fc3        | Linear       | 16.4 K
4 | fc4        | Linear       | 195   
5 | tanh       | Tanh         | 0     
6 | gelu       | GELU         | 0     
7 | softmax    | Softmax      | 0     
8 | bert_model | RobertaModel | 134 M 
--------------------------------------------
410 K     Trainable params
134 M     Non-trainable params
135 M     Total params
541.242   Total estimated 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])


output torch.Size([32, 3]) tensor([0.3544, 0.3211, 0.3246])
CROSS ENTROPY
YPRED  tensor([0.3544, 0.3211, 0.3246])   tensor([1, 0, 0])
output torch.Size([32, 3]) tensor([0.3525, 0.3203, 0.3272])
CROSS ENTROPY
YPRED  tensor([0.3525, 0.3203, 0.3272])   tensor([0, 1, 0])
Validation: Accuracy: 0.0


  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

labels tensor([[0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0]])
output torch.Size([32, 3]) tensor([0.3503, 0.3231, 0.3266], grad_fn=<SelectBackward0>)
Ypred tensor([[0.3503, 0.3231, 0.3266],
        [0.3544, 0.3192, 0.3264],
        [0.3543, 0.3209, 0.3248],
        [0.3528, 0.3179, 0.3293],
        [0.3530, 0.3198, 0.3272],
        [0.3535, 0.3205, 0.3260],
        [0.3531, 0.3213, 0.3256],
        [0.3560, 0.3188, 0.3252],
        [0.3499, 0.

  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])


Validation: |          | 0/? [00:00<?, ?it/s]

output torch.Size([32, 3]) tensor([0.3528, 0.3194, 0.3278])
CROSS ENTROPY
YPRED  tensor([0.3528, 0.3194, 0.3278])   tensor([1, 0, 0])
output torch.Size([32, 3]) tensor([0.3508, 0.3190, 0.3302])
CROSS ENTROPY
YPRED  tensor([0.3508, 0.3190, 0.3302])   tensor([0, 1, 0])
output torch.Size([32, 3]) tensor([0.3520, 0.3194, 0.3286])
CROSS ENTROPY
YPRED  tensor([0.3520, 0.3194, 0.3286])   tensor([1, 0, 0])
output torch.Size([4, 3]) tensor([0.3525, 0.3194, 0.3280])
CROSS ENTROPY
YPRED  tensor([0.3525, 0.3194, 0.3280])   tensor([1, 0, 0])
Validation: Accuracy: 0.0

Epoch: 0
Training: loss: 0.034132588654756546
Training: Accuracy: 0.0


INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type         | Params
--------------------------------------------
0 | drop_out   | Dropout      | 0     
1 | fc1        | Linear       | 295 K 
2 | fc2        | Linear       | 98.6 K
3 | fc3        | Linear       | 16.4 K
4 | fc4        | Linear       | 195   
5 | tanh       | Tanh         | 0     
6 | gelu       | GELU         | 0     
7 | softmax    | Softmax      | 0     
8 | bert_model | RobertaModel | 134 M 
--------------------------------------------
135 M     Trainable params
0         Non-trainable params
135 M

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])


output torch.Size([32, 3]) tensor([0.3528, 0.3194, 0.3278])
CROSS ENTROPY
YPRED  tensor([0.3528, 0.3194, 0.3278])   tensor([1, 0, 0])
output torch.Size([32, 3]) tensor([0.3508, 0.3190, 0.3302])
CROSS ENTROPY
YPRED  tensor([0.3508, 0.3190, 0.3302])   tensor([0, 1, 0])
Validation: Accuracy: 0.0


  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

labels tensor([[0, 0, 1],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 0, 1],
        [1, 0, 0],
        [1, 0, 0],
        [0, 0, 1]])
output torch.Size([32, 3]) tensor([0.3535, 0.3152, 0.3313], grad_fn=<SelectBackward0>)
Ypred tensor([[0.3535, 0.3152, 0.3313],
        [0.3543, 0.3189, 0.3268],
        [0.3514, 0.3187, 0.3299],
        [0.3487, 0.3226, 0.3287],
        [0.3520, 0.3228, 0.3252],
        [0.3483, 0.3218, 0.3299],
        [0.3510, 0.3201, 0.3289],
        [0.3524, 0.3196, 0.3280],
        [0.3532, 0.

  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  key: torch.tensor(val[idx]).clone().detach()
  item['labels'] = torch.tensor(self.labels[idx])
  item['labels'] = torch.tensor(self.labels[idx])


Validation: |          | 0/? [00:00<?, ?it/s]

output torch.Size([32, 3]) tensor([0.3526, 0.3195, 0.3279])
CROSS ENTROPY
YPRED  tensor([0.3526, 0.3195, 0.3279])   tensor([1, 0, 0])
output torch.Size([32, 3]) tensor([0.3506, 0.3193, 0.3301])
CROSS ENTROPY
YPRED  tensor([0.3506, 0.3193, 0.3301])   tensor([0, 1, 0])
output torch.Size([32, 3]) tensor([0.3518, 0.3195, 0.3286])
CROSS ENTROPY
YPRED  tensor([0.3518, 0.3195, 0.3286])   tensor([1, 0, 0])
output torch.Size([4, 3]) tensor([0.3523, 0.3197, 0.3280])
CROSS ENTROPY
YPRED  tensor([0.3523, 0.3197, 0.3280])   tensor([1, 0, 0])
Validation: Accuracy: 0.0

Epoch: 0
Training: loss: 0.03414466977119446
Training: Accuracy: 0.0


INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.


In [19]:
def log_model(model_name, model, params, metrics, artifacts=None, experiment_uri=''):
    '''Log Model in MLFlow'''

    ### define experiment uri for mlflow
    if experiment_uri != '':
        if not mlflow.get_experiment_by_name(experiment_uri):
            mlflow.create_experiment(experiment_uri)
        mlflow.set_experiment(experiment_uri)

    with mlflow.start_run(run_name=model_name) as run:
        experimentID = run.info.experiment_id
        print("Experiment ID", experimentID)
        mlflow.pytorch.log_model(model, model_name)
        mlflow.pytorch.log_state_dict(model.state_dict(), model_name)

        for k,v in params.items():
            mlflow.log_param(k, v)
        for k,v in metrics.items():
            mlflow.log_metric(k, v)
        if artifacts is not None:
            for artifact in artifacts:
                mlflow.log_artifact(artifact)
        mlflow.end_run()

In [None]:
experiment_uri = f"/Users/{cfg['mlflow_email']}/{cfg['model_name']}"
logger.info(f"Logging to MlFlow in : {experiment_uri}")
model_name = cfg['model_name']

params = {
    'description': cfg['model_desc'],
    'epochs': EPOCHS,
    'max_sequence_length': INPUT_MAX_LEN,
    'batch_size': BATCH_SIZE,
    'max_step_per_epoch': LIMIT_STEP,
    'lr': STANDARD_LR,
}

model_metrics = {
  "train_loss" : round(model.epoch_loss_train[-1].item(), 3),
  "train_acc" : round(model.epoch_acc_train[-1].item(), 3),
  "eval_loss" : round(model.epoch_loss_val[-1].item(), 3),
  "eval_acc" : round(model.epoch_acc_val[-1].item(), 3),
}

log_model(model_name, model, params, model_metrics, experiment_uri=experiment_uri)

In [27]:
### model validation

df_test = data_module.test_df
test_set = data_module.val_dataloader()
print("TESt",test_set)
batch = test_set.dataset.encodings
#print("batch",batch)
total_len = len(batch['input_ids'])
print("TOT",total_len)
step = 512

probas = []
model.eval()
with torch.no_grad():
    for i in tqdm(range(0, total_len, step)):
        outputs = model.forward(
            input_ids=batch['input_ids'][i : i + step],
            attention_mask=batch['attention_mask'][i : i + step]
        )
        print(outputs)

        probas.append(torch.sigmoid(outputs).cpu().detach().numpy())
        print("probas",probas)

thres = 0.5
result = np.vstack(probas)
result = (result >= thres).astype(int)
print(
    classification_report(np.array(test_set.dataset.labels), result, target_names=Y_COL)
)



TESt <torch.utils.data.dataloader.DataLoader object at 0x7f8330458250>
TOT 100


100%|██████████| 1/1 [00:18<00:00, 18.03s/it]

output torch.Size([100, 3]) tensor([0.3526, 0.3195, 0.3279])
tensor([[0.3526, 0.3195, 0.3279],
        [0.3520, 0.3197, 0.3283],
        [0.3512, 0.3202, 0.3286],
        [0.3514, 0.3202, 0.3284],
        [0.3520, 0.3183, 0.3297],
        [0.3520, 0.3203, 0.3277],
        [0.3533, 0.3201, 0.3266],
        [0.3515, 0.3194, 0.3291],
        [0.3507, 0.3211, 0.3282],
        [0.3513, 0.3193, 0.3294],
        [0.3513, 0.3192, 0.3296],
        [0.3516, 0.3197, 0.3286],
        [0.3524, 0.3195, 0.3281],
        [0.3518, 0.3201, 0.3281],
        [0.3507, 0.3199, 0.3295],
        [0.3508, 0.3207, 0.3285],
        [0.3512, 0.3200, 0.3288],
        [0.3522, 0.3198, 0.3281],
        [0.3524, 0.3194, 0.3282],
        [0.3511, 0.3201, 0.3287],
        [0.3507, 0.3198, 0.3295],
        [0.3512, 0.3200, 0.3287],
        [0.3524, 0.3192, 0.3285],
        [0.3514, 0.3200, 0.3286],
        [0.3519, 0.3194, 0.3287],
        [0.3512, 0.3201, 0.3286],
        [0.3516, 0.3193, 0.3290],
        [0.3510, 0.31


