In [15]:
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample, InputFeatures

import tensorflow as tf
import pandas as pd

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
_________________________________________________________________


## Train/Load the model

In [None]:
## Train the model
# model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
# tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

## Load the saved model
model = tf.keras.models.load_model('./')
model.summary()

## Skip to Inference if load the saved model

In [2]:
!pwd

/Users/tony/Project/Testing Bert


In [17]:
csv_file_path = "./data.csv" #dataset source:https://www.kaggle.com/datasets/sbhatti/financial-sentiment-analysis?resource=download
dataset = pd.read_csv(csv_file_path)

#remove neutral rows from dataset
dataset = dataset[dataset['Sentiment'] != 'neutral']
print(dataset)

                                               Sentence Sentiment
0     The GeoSolutions technology will leverage Bene...  positive
1     $ESI on lows, down $1.50 to $2.50 BK a real po...  negative
2     For the last quarter of 2010 , Componenta 's n...  positive
5       $SPY wouldn't be surprised to see a green close  positive
6     Shell's $70 Billion BG Deal Meets Shareholder ...  negative
...                                                 ...       ...
5832  Operating profit fell to EUR 38.1 mn from EUR ...  negative
5835  HSBC Says Unit to Book $585 Million Charge on ...  negative
5836  Daily Mail parent company in talks with potent...  positive
5837  RISING costs have forced packaging producer Hu...  negative
5841  HELSINKI AFX - KCI Konecranes said it has won ...  positive

[2712 rows x 2 columns]


In [4]:
def encode_dataset(data, tokenizer, max_length=512):
    input_ids, attention_masks, labels = [], [], []

    sentiment_map = {"positive": 1, "negative": 0}

    for index, row in data.iterrows():
        inputs = tokenizer.encode_plus(
            row["Sentence"],
            add_special_tokens=True,
            max_length=max_length,
            padding="max_length",
            truncation=True,
        )

        input_ids.append(inputs["input_ids"])
        attention_masks.append(inputs["attention_mask"])
        labels.append(sentiment_map[row["Sentiment"]])

    return (
        tf.data.Dataset.from_tensor_slices(
            ({"input_ids": input_ids, "attention_mask": attention_masks}, labels)
        ),
        max_length,
    )

In [5]:
tf_data, max_length = encode_dataset(dataset, tokenizer)
print(tf_data)

<_TensorSliceDataset element_spec=({'input_ids': TensorSpec(shape=(512,), dtype=tf.int32, name=None), 'attention_mask': TensorSpec(shape=(512,), dtype=tf.int32, name=None)}, TensorSpec(shape=(), dtype=tf.int32, name=None))>


In [6]:
# train-test-val split
length = len(tf_data)
train_size = int(length*0.7)
test_size = int(length*0.15)
val_size = int(length*0.15)

BATCH_SIZE = 4

tf_data = tf_data.shuffle(len(tf_data)).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
train_data = tf_data.take(train_size)
test_data = tf_data.skip(train_size)
val_data = test_data.skip(val_size)
test_data = test_data.take(test_size)

In [7]:
print(tf_data)
print(train_data)

<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.int32, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.int32, name=None)}, TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.int32, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.int32, name=None)}, TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [3]:
tf.config.list_physical_devices(
    device_type=None
)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [9]:
#Training
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)  #use tf.keras.optimizers.legacy.Adam for m1
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

EPOCHS = 3

device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0"
with tf.device(device):
    history = model.fit(tf_data, epochs=EPOCHS, validation_data=val_data)
    
# model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0), 
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
#               metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])



Epoch 1/3


2023-05-24 03:23:55.636187: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/3
Epoch 3/3


## Inference

In [19]:
pred_sentences = ['''German economy entered recession as inflation hurts consumers Reuters May 25, 20236:13 PM GMT+8Updated 6 min ago Outbreak of the coronavirus disease (COVID-19) pandemic in Berlin People pass by the Europa-Center shopping mall, amid the coronavirus disease (COVID-19) pandemic in Berlin, Germany, December 14, 2020. REUTERS/Michele Tantussi BERLIN, May 24 (Reuters) - The German economy was in recession in early 2023 after households spending in Europe's economic engine finally succumbed to the pressure of high inflation. Gross domestic product fell by 0.3% in the first quarter of the year when adjusted for price and calendar effects, a second estimate from the statistics office showed on Thursday. This follows a decline of 0.5% in the fourth quarter of 2022. A recession is commonly defined as two successive quarters of contraction. Advertisement · Scroll to continue German GDP data showed "surprisingly negative signals," Finance Minister Christian Lindner said on Thursday. He added that comparing Germany with other highly developed economies, the economy was losing potential for growth. "I don't want Germany to play in a league in which we have to relegate ourselves to the last positions," he said, referring to the forecasts of the International Monetary Fund, which forecast a recession in 2023 only in Germany and Britain among European countries. "Under the weight of immense inflation, the German consumer has fallen to his knees, dragging the entire economy down with him," Andreas Scheuerle, an analyst at DekaBank, said. Advertisement · Scroll to continue Household consumption was down 1.2% quarter-on-quarter after price, seasonal and calendar adjustments. Government spending also decreased significantly by 4.9% on the quarter. "The warm winter weather, a rebound in industrial activity, helped by the Chinese reopening, and an easing of supply chain frictions, were not enough to get the economy out of the recessionary danger zone," ING's global head of macro Carsten Brzeski said. By contrast, investment was up in the first three months of the year, following a weak second half of 2022. Investment in machinery and equipment increased by 3.2% compared with the previous quarter, while investment in construction went up 3.9% on quarter. There were also positive contributions from trade. Exports rose 0.4%, while imports fell 0.9%. "The massive rise in energy prices took its toll in the winter half-year," Commerzbank's chief economist Joerg Kraemer said. Advertisement · Scroll to continue A recession could not be avoided and now the question is whether there will be any recovery in the second half of the year. "Looking beyond the first quarter, the optimism at the start of the year seems to have given way to more of a sense of reality," ING's Brzeski said. A drop in purchasing power, thinned-out industrial order books, aggressive monetary policy tightening, and the expected slowdown of the U.S. economy, all argue in favour of weak economic activity. Following Wednesday's decline in the Ifo business climate, all key leading indicators in the manufacturing sector are now falling, Kraemer from Commerzbank said. The German Bundesbank, however, expects the economy to grow modestly in the second quarter as a rebound in industry more than offsets stagnating household consumption and a slump in construction, according to a monthly economy report published on Wednesday.''',
                 '''Exclusive: Asia-focused HSBC puts 12 countries on exit watchlist By Lawrence White May 24, 20237:05 PM GMT+8Updated a day ago Georges Elhedery, HSBC's CFO, gestures during an interview with Reuters in Dubai Georges Elhedery, HSBC's CFO, gestures during an interview with Reuters in Dubai, United Arab Emirates August 7, 2017. REUTERS/Tom Arnold/ Summary Companies Reviews presence in smaller markets to chase bigger Asian growth Looking to add 2,000 Chinese wealth managers over next two years LONDON, May 24 (Reuters) - HSBC (HSBA.L) is reviewing a possible exit from as many as 1 in 5 of the countries the lender operates in to sharpen its focus on Asian expansion, Chief Financial Officer Georges Elhedery told Reuters in his first interview since taking the role. These reviews, which could see the British bank deciding to sell or streamline businesses in 12 countries, follow pressure from Chinese shareholder Ping An Insurance (601318.SS), which wants HSBC to prioritise growth in its money-spinning Asian business which generates 78% of group profit. Advertisement · Scroll to continue "Some of these will have slower progress than others, and none of them is material enough on its own to change the profile of the overall business, but as we progress through and execute on these assessments, we do expect them to contribute towards that shift to Asia," Elhedery said, declining to disclose which markets were under review or the time frame for the processes. HSBC's ongoing pivot to Asia has already triggered planned sales of all or parts of its businesses in France, Greece, Russia and Canada, announced in the last two years. While the markets under review may be relatively small, the move is significant in showing the pressure HSBC faces to shrink its once globe-spanning local banking businesses in order to lift returns and appease its investors. Advertisement · Scroll to continue HSBC does not break out the results of every individual country in which it operates in its overall results, making identifying underperforming markets challenging. But its businesses in Europe and Latin America may be particularly under the microscope, with the former region making a net loss in 2022 thanks to restructuring and the costs booked to its headquarters in the region. Latin America contributed just under 5% of group profit. One country not currently under review is Mexico, Elhedery said, despite debate among analysts and investors on the bank's future presence in the country. "Mexico is performing very well for us," the veteran banker said, pointing to the U.S.-Mexico-Canada trade agreement and to the China Plus One strategy, which have supported economic growth in Mexico. "Some 70% of client acquisition in the retail business is through employees of the multinational companies that HSBC banks in Mexico, so there are strong synergies with the wholesale business and the package as a whole makes sense for us," he added. Advertisement · Scroll to continue BIGGER DEALS PRESENT WIDER CHALLENGES Ping An was the only major HSBC investor backing proposals to force the bank to publish regular assessments on the merits of dividing its franchise along Asian and Western lines at HSBC's annual shareholder meeting on May 5. A spokesperson for Ping An said the company had no further comment. The failure of Ping An to secure further backing for a split has afforded HSBC Chairman Mark Tucker, Chief Executive Noel Quinn and newly-promoted Elhedery some breathing space to pursue greater profit growth on their terms. "It's overwhelmingly clear what the majority of our shareholders bar one expect from us, and therefore all our focus now is on delivering for the business and for our customers," Elhedery said. Wider challenges include executing critical asset sales, managing a price war with rivals as interest rate hikes peak, and dealing with rising political tensions between East and West, analysts and investors said. The bank on April 14 said a nominal 1 euro ($1.10) deal to offload its French retail business could falter after interest rate hikes upped the amount of capital Cerberus-backed buyer, My Money, will need to secure regulatory approval. HSBC had said it expected to incur a loss of around $2.3 billion on the disposal should it go ahead. Elhedery said negotiations are ongoing but HSBC would walk away from the deal to protect shareholder value if necessary. HSBC's larger $10 billion sale of its Canada unit has also been delayed until next year, as it battles to ensure a smooth transition of systems to the buyer, Royal Bank of Canada. Failure to complete either of those deals could have wider consequences for HSBC. "In the short term, the risk that the French and Canadian disposals don't complete ... could put a spanner in the works of its Asia pivot and spark a fresh wave of activism," said Susannah Streeter, head of money and markets at Hargreaves Lansdown. Beyond dealmaking, Elhedery said the medium-term challenge is sustaining momentum in revenue growth, with the fillip from rising central bank interest rates worldwide already tapering off. The bank is trying to increase income through fee-based products and services, especially in China and Hong Kong where economies are beginning to normalise following the lifting of COVID-19 related restrictions. HSBC is on track to hire around 2,000 private wealth managers in China's insurance sector over the next two years, adding to the 1,000 hired last year, Elhedery said.''']

references:
https://www.reuters.com/business/finance/asia-focused-hsbc-puts-12-countries-exit-watchlist-2023-05-24/
https://www.reuters.com/markets/europe/germany-enters-recession-2023-05-25/

In [20]:
#Predictons
tf_batch = tokenizer(pred_sentences, max_length=512, padding=True, truncation=True, return_tensors='tf')
tf_outputs = model(tf_batch)
#tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
tf_predictions = tf.nn.softmax(tf_outputs['logits'], axis=-1)
labels = ['Negative','Positive']
label = tf.argmax(tf_predictions, axis=1)
label = label.numpy()
for i in range(len(pred_sentences)):
  print(pred_sentences[i], ": \n\n", labels[label[i]], "\n\n")


German economy entered recession as inflation hurts consumers Reuters May 25, 20236:13 PM GMT+8Updated 6 min ago Outbreak of the coronavirus disease (COVID-19) pandemic in Berlin People pass by the Europa-Center shopping mall, amid the coronavirus disease (COVID-19) pandemic in Berlin, Germany, December 14, 2020. REUTERS/Michele Tantussi BERLIN, May 24 (Reuters) - The German economy was in recession in early 2023 after households spending in Europe's economic engine finally succumbed to the pressure of high inflation. Gross domestic product fell by 0.3% in the first quarter of the year when adjusted for price and calendar effects, a second estimate from the statistics office showed on Thursday. This follows a decline of 0.5% in the fourth quarter of 2022. A recession is commonly defined as two successive quarters of contraction. Advertisement · Scroll to continue German GDP data showed "surprisingly negative signals," Finance Minister Christian Lindner said on Thursday. He added that c

In [19]:
# Save model
model.save('./')

NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.