In [34]:
import pandas as pd
import numpy as np

import tensorflow as tf
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM, pipeline,BertTokenizer,logging
from huggingface_hub import from_pretrained_keras

import warnings
warnings.filterwarnings('ignore')
logging.set_verbosity_error()

In [69]:
topics = pd.read_csv('data/topics.csv')
conclusions = pd.read_csv('data/conclusions.csv')
opinions = pd.read_csv('data/opinions.csv')

df = pd.merge(topics[['topic_id','text']],conclusions[['topic_id','text']],on='topic_id')
df = pd.merge(df, opinions[['topic_id','text','type','effectiveness']], on='topic_id')
df.rename({'text_x':'topics','text_y':'conclusions','text':'opinions'}, axis='columns',inplace=True)
df

Unnamed: 0,topic_id,topics,conclusions,opinions,type,effectiveness
0,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,I think that the face is a natural landform be...,Claim,Adequate
1,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"If life was on Mars, we would know by now. The...",Evidence,Adequate
2,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,People thought that the face was formed by ali...,Counterclaim,Adequate
3,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"though some say that life on Mars does exist, ...",Rebuttal,Ineffective
4,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"It says in paragraph 7, on April 5, 1998, Mars...",Evidence,Adequate
...,...,...,...,...,...,...
23349,FF9E0379CD98,"I think is not benefit for student, that take ...",Conclusion: you dont are prepared for take the...,you cant work or cant study after school with ...,Evidence,Adequate
23350,FF9E0379CD98,"I think is not benefit for student, that take ...",Conclusion: you dont are prepared for take the...,in the end you cant take the class because is ...,Counterclaim,Adequate
23351,FFA381E58FC6,Advice is something that can impact a persons ...,In conclusion asking for an opinion can be ben...,For many people they don't like only asking on...,Claim,Adequate
23352,FFA381E58FC6,Advice is something that can impact a persons ...,In conclusion asking for an opinion can be ben...,also people have different views and opinions ...,Claim,Adequate



-----------------------------

## Summarization





In [32]:
model_summarizer = AutoModelForSeq2SeqLM.from_pretrained("necover/fine_tuned_bartbasedmodel_concluions")
tokenizer = AutoTokenizer.from_pretrained("necover/fine_tuned_bartbasedtokenizer_concluions")

summarization_pipeline = pipeline("summarization", model=model_summarizer, tokenizer=tokenizer)

config.json:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/358 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [59]:
input =df['topics'][200]
output = summarization_pipeline(input)[0]['summary_text']
print(f'Input :{input}\nOutput :{output}')

Input :Emplimenting a computer system in schools that is capable of reading a students emotions is inappropriate, lazy, and dangerous. 
Output :In conclusion, computers in schools should not be able to read a student's emotions. Emplimenting a computer system in schools that is capable of reading a students emotions is inappropriate, lazy, and dangerous. It is a waste of money, time, and money that should be spent on other things. 


In [60]:
input = 'Limiting our usage of any type of motorized vehicle, would not only decrease air pollution but it would also help with adults or children that may not exercise enough.'
output = summarization_pipeline(input)[0]['summary_text']
print(f'Input :{input}\nOutput :{output}')

Input :Limiting our usage of any type of motorized vehicle, would not only decrease air pollution but it would also help with adults or children that may not exercise enough.
Output : would also help with adults or children that may not exercise enough. Limiting our usage of any type of motorized vehicle, would not only decrease air pollution but it would also improve our health. It would be a good idea to limit the amount of air pollution in the United States, and it would help with the health of adults and children.


-------------------

##**Classifier**

In [15]:
labels_similarity = ['Effective', 'Adequate', 'Ineffective']
labels_type = ['Claim','Evidence','Counterclaim','Rebuttal']

In [76]:
model_effectiveness = from_pretrained_keras("necover/Bert_Based_effectiveness_classifier_V2")
model_type = from_pretrained_keras("necover/Bert_Based_type_classifier_V2")

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]



Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

fingerprint.pb:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

keras_metadata.pb:   0%|          | 0.00/190k [00:00<?, ?B/s]

model.png:   0%|          | 0.00/32.2k [00:00<?, ?B/s]

saved_model.pb:   0%|          | 0.00/6.56M [00:00<?, ?B/s]

variables/variables.index:   0%|          | 0.00/12.2k [00:00<?, ?B/s]

variables.data-00000-of-00001:   0%|          | 0.00/440M [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/840 [00:00<?, ?B/s]



In [62]:
class BertSemanticDataGenerator(tf.keras.utils.Sequence):

    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=32,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        # We will use base-base-uncased pretrained model.
        self.tokenizer = BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=256,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            truncation=True,
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)

In [63]:
def check(sentence1, sentence2, model, label_list):
    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data[0])[0]
    idx = np.argmax(proba)
    print(idx)
    proba = f"{proba[idx]: .2f}%"
    pred = label_list[idx]
    return pred, proba

In [21]:
opinions.iloc[0]

id                                                    c22adee811b6
topic_id                                              007ACE74B050
text             I think that the face is a natural landform be...
type                                                         Claim
effectiveness                                             Adequate
Name: 0, dtype: object

In [68]:
df

Unnamed: 0,topic_id,topics,conclusions,opinions,type
0,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,I think that the face is a natural landform be...,Claim
1,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"If life was on Mars, we would know by now. The...",Evidence
2,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,People thought that the face was formed by ali...,Counterclaim
3,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"though some say that life on Mars does exist, ...",Rebuttal
4,007ACE74B050,"On my perspective, I think that the face is a ...",Though people were not satified about how the ...,"It says in paragraph 7, on April 5, 1998, Mars...",Evidence
...,...,...,...,...,...
23349,FF9E0379CD98,"I think is not benefit for student, that take ...",Conclusion: you dont are prepared for take the...,you cant work or cant study after school with ...,Evidence
23350,FF9E0379CD98,"I think is not benefit for student, that take ...",Conclusion: you dont are prepared for take the...,in the end you cant take the class because is ...,Counterclaim
23351,FFA381E58FC6,Advice is something that can impact a persons ...,In conclusion asking for an opinion can be ben...,For many people they don't like only asking on...,Claim
23352,FFA381E58FC6,Advice is something that can impact a persons ...,In conclusion asking for an opinion can be ben...,also people have different views and opinions ...,Claim


In [70]:
idx = 200
s1 = df['topics'][idx]
s2 = df['opinions'][idx]
ef_test = df['effectiveness'][idx]
ty_test = df['type'][idx]

ef_pred, ef_prob = check(s1, s2, model_effectiveness, labels_similarity)
ty_pred, ty_prob = check(s1, s2, model_type, labels_type)
print(f"Topic :{s1}\nOpinion : {s2}\nEffectiveness: {ef_test}\tty_pred : {ty_test}\nPredicted Effectiveness: {ef_pred}\tPredicted Type: {ty_pred}")

0
0
Topic :Emplimenting a computer system in schools that is capable of reading a students emotions is inappropriate, lazy, and dangerous. 
Opinion : In school, students are faced with one major task: to learn. Teachers are faced with the task of teaching. A teacher's job is to engage students and the students job is to stay engaged with the teacher. 
Effectiveness: Effective	ty_pred : Evidence
Predicted Effectiveness: Effective	Predicted Type: Claim


In [79]:
sentence1 = "Car-free cities seem to be more safe."
sentence2 = "make the streets safer "
check(sentence1, sentence2, model_effectiveness, labels_similarity),check(sentence1, sentence2, model_type, labels_type)

1
0


(('Adequate', ' 0.82%'), ('Claim', ' 0.99%'))