<a href="https://colab.research.google.com/github/lucacerab/DSEthesis-transformers/blob/main/ECBspeech_AM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Applying the argumentation mining pipeline on the most representative speech related to the monetary policy topic extracted in BERTopic_ECB_V1.ipynb

In [1]:
%%capture
import nltk
nltk.download('punkt')
import pandas as pd 
!pip install transformers
from transformers import TFAutoModel
from tensorflow import keras
from transformers import BertTokenizer
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data preparation

In [3]:
path = '/content/drive/MyDrive/Colab Notebooks/tesi/TM/final/V1/docs_monetarypolicy.txt'
with open(path, encoding="utf8") as f:
  lines = f.read().split('\n')
  docs = [i for i in lines if len(i) > 0]

In [4]:
docs

['Hearing at the Committee on Economic and Monetary Affairs of the European Parliament   Introductory statement by Mario Draghi, President of the ECB, Brussels, 8 July 2013 Madam Chair, Honourable members of the Committee on Economic and Monetary Affairs, It is a great pleasure for me to be back with your committee for our last exchange of views before the summer break. At this time, it is worth taking stock of progress over the past 12 months. Clearly, financial conditions in the euro area today are more stable and resilient than they were last summer. This is partly due to our determined monetary policy actions. Governments and parliaments have also played a key role in the relative return of confidence and stability by undertaking courageous reforms, at both the national and European levels.  Yet despite this progress, the euro area still faces considerable challenges. The economy is still weak. Financial fragmentation remains. This challenges the very concept of the Single Market. 

In [5]:
sentences = nltk.tokenize.sent_tokenize(docs[0])

In [6]:
sentences[0:1] = (sentences[0].split('2013 ', 1))

In [7]:
sentences[0] = sentences[0] + '2013'

In [8]:
for i, value in enumerate(sentences):
  if value == '1.':
    a = sentences[i+1].split('Let ')[1].strip()
    sentences[i] = 'Let ' + a
    sentences.pop(i+1)
  elif value == '2.':
    b = sentences[i+1].split('Let ')[1].strip()
    sentences[i] = 'Let ' + b
    sentences.pop(i+1)
  elif value == '3.':
    c = sentences[i+1].split('union', 1)[1].strip()
    sentences[i] = c
    sentences.pop(i+1)

In [9]:
sentences

['Hearing at the Committee on Economic and Monetary Affairs of the European Parliament   Introductory statement by Mario Draghi, President of the ECB, Brussels, 8 July 2013',
 'Madam Chair, Honourable members of the Committee on Economic and Monetary Affairs, It is a great pleasure for me to be back with your committee for our last exchange of views before the summer break.',
 'At this time, it is worth taking stock of progress over the past 12 months.',
 'Clearly, financial conditions in the euro area today are more stable and resilient than they were last summer.',
 'This is partly due to our determined monetary policy actions.',
 'Governments and parliaments have also played a key role in the relative return of confidence and stability by undertaking courageous reforms, at both the national and European levels.',
 'Yet despite this progress, the euro area still faces considerable challenges.',
 'The economy is still weak.',
 'Financial fragmentation remains.',
 'This challenges the 

In [10]:
topic = ['monetary policy']*len(sentences)

In [11]:
df = pd.DataFrame(list(zip(topic[1:], sentences[1:])), columns =['topic', 'sentence'])

In [12]:
df.head()

Unnamed: 0,topic,sentence
0,monetary policy,"Madam Chair, Honourable members of the Committ..."
1,monetary policy,"At this time, it is worth taking stock of prog..."
2,monetary policy,"Clearly, financial conditions in the euro area..."
3,monetary policy,This is partly due to our determined monetary ...
4,monetary policy,Governments and parliaments have also played a...


## AM pipeline

In [13]:
#load bert tokenizer, bert1 and bert2
bert = TFAutoModel.from_pretrained('bert-base-cased')
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
bert1 = keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/tesi/AM/binary/UKP_binary/UKP_binary_final/UKP_binary_finetuned/UKPentire_binary.h5')
bert2 = keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/tesi/AM/subjectivity/SUBJ_final.h5')

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [14]:
max_len = 128
def bert_encode(df, max_len):
  num_samples = len(df)

  X_ids = np.zeros((num_samples, max_len))
  X_mask = np.zeros((num_samples, max_len))
  X_type_ids = np.zeros((num_samples, max_len))
  

  for i, row in df.iterrows():
      encoded = tokenizer.encode_plus(row['topic'], row['sentence'], max_length=max_len,
                                      truncation=True, padding='max_length', add_special_tokens=True)    
      X_ids[i, :] = encoded['input_ids']
      X_mask[i, :] = encoded['attention_mask'] 
      X_type_ids[i, :] = encoded['token_type_ids'] 
  return X_ids, X_mask, X_type_ids

In [15]:
def mine_args(df):
  #detect argumentative sentences
  ecb_input_ids, ecb_attention_masks, ecb_token_type_ids = bert_encode(df, max_len)
  results0 = bert1.predict([ecb_input_ids, ecb_attention_masks, ecb_token_type_ids])
  preds0 = np.round(results0).astype(int)
  df['preds0'] = preds0
  df['new_preds0'] = ['NoArgument' if ele == 0 else 'Argument' for ele in df['preds0']]
  df = df.drop('preds0', axis=1).reset_index(drop=True).rename(columns={'new_preds0': 'preds0'})

  #distinguish premises and claims
  args = df[df['preds0'] == 'Argument'].reset_index(drop=True)
  args_input_ids, args_attention_masks, _ = bert_encode(args, max_len)
  results1 = bert2.predict([args_input_ids, args_attention_masks])
  preds1 = np.round(results1).astype(int)
  args['preds1'] = preds1
  args['new_preds1'] = ['Premise' if ele == 0 else 'Claim' for ele in args['preds1']]
  args = args.drop('preds1', axis=1).reset_index(drop=True).rename(columns={'new_preds1': 'preds1'})

  #combine preds
  comb = pd.merge(df, args, on=['topic', 'sentence', 'preds0'], how='left')
  comb.preds1 = comb.preds1.fillna('NoArgument')

  return comb

In [17]:
comb = mine_args(df)

In [18]:
comb

Unnamed: 0,topic,sentence,preds0,preds1
0,monetary policy,"Madam Chair, Honourable members of the Committ...",NoArgument,NoArgument
1,monetary policy,"At this time, it is worth taking stock of prog...",NoArgument,NoArgument
2,monetary policy,"Clearly, financial conditions in the euro area...",Argument,Premise
3,monetary policy,This is partly due to our determined monetary ...,NoArgument,NoArgument
4,monetary policy,Governments and parliaments have also played a...,NoArgument,NoArgument
...,...,...,...,...
88,monetary policy,A single resolution mechanism is the next cruc...,NoArgument,NoArgument
89,monetary policy,It is an indispensable complement to the singl...,NoArgument,NoArgument
90,monetary policy,The ECB looks forward to the Commission’s prop...,NoArgument,NoArgument
91,monetary policy,Thank you for your attention.,NoArgument,NoArgument


In [19]:
comb.values

array([['monetary policy',
        'Madam Chair, Honourable members of the Committee on Economic and Monetary Affairs, It is a great pleasure for me to be back with your committee for our last exchange of views before the summer break.',
        'NoArgument', 'NoArgument'],
       ['monetary policy',
        'At this time, it is worth taking stock of progress over the past 12 months.',
        'NoArgument', 'NoArgument'],
       ['monetary policy',
        'Clearly, financial conditions in the euro area today are more stable and resilient than they were last summer.',
        'Argument', 'Premise'],
       ['monetary policy',
        'This is partly due to our determined monetary policy actions.',
        'NoArgument', 'NoArgument'],
       ['monetary policy',
        'Governments and parliaments have also played a key role in the relative return of confidence and stability by undertaking courageous reforms, at both the national and European levels.',
        'NoArgument', 'NoArgument'