In [None]:
import tensorflow as tf
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    strategy = tf.distribute.get_strategy() # for CPU and single GPU
print('Number of replicas:', strategy.num_replicas_in_sync)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
train=pd.read_csv('../input/contradictory-my-dear-watson/train.csv')
test=pd.read_csv('../input/contradictory-my-dear-watson/test.csv')

In [None]:
plt.figure(figsize=(15,8))
sns.countplot(x='language',data=train)

In [None]:
print(train.shape)
print(test.shape)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train.label.value_counts()

In [None]:
#!pip install transformers
from transformers import TFAutoModel,AutoTokenizer
import tensorflow as tf
#!pip install sentencepiece

In [None]:
tokenizer=AutoTokenizer.from_pretrained('joeddav/xlm-roberta-large-xnli')
train_enc=tokenizer.batch_encode_plus(train[['premise','hypothesis']].values.tolist(),padding='max_length',max_length=100,truncation=True,return_attention_mask=True)
test_enc=tokenizer.batch_encode_plus(test[['premise','hypothesis']].values.tolist(),padding='max_length',max_length=100,truncation=True,return_attention_mask=True)
train_tf1=tf.convert_to_tensor(train_enc['input_ids'],dtype=tf.int32)
train_tf2=tf.convert_to_tensor(train_enc['attention_mask'],dtype=tf.int32)
train_input={'input_word_ids':train_tf1,'input_mask':train_tf2}
test_tf1=tf.convert_to_tensor(test_enc['input_ids'],dtype=tf.int32)
test_tf2=tf.convert_to_tensor(test_enc['attention_mask'],dtype=tf.int32)
test_input={'input_word_ids':test_tf1,'input_mask':test_tf2}

In [None]:
train_enc[100]

In [None]:
with strategy.scope():
    input_ids = tf.keras.Input(shape = (100,), dtype = tf.int32,name='input_word_ids') 
    input_mask=tf.keras.Input(shape=(100,),dtype=tf.int32,name='input_mask')    
    roberta = TFAutoModel.from_pretrained('joeddav/xlm-roberta-large-xnli')
    roberta = roberta([input_ids,input_mask])[0]
    out = tf.keras.layers.GlobalAveragePooling1D()(roberta)
    out = tf.keras.layers.Dense(3, activation = 'softmax')(out)
    model = tf.keras.Model(inputs = [input_ids,input_mask], outputs = out)
    model.compile(
                        optimizer = tf.keras.optimizers.Adam(lr = 1e-5), 
                        loss = 'sparse_categorical_crossentropy', 
                        metrics = ['accuracy']) 
    model.summary()

In [None]:
strategy.num_replicas_in_sync

In [None]:
es=tf.keras.callbacks.EarlyStopping(patience=2,restore_best_weights=True)
model.fit(train_input,train.label,validation_split=0.2,epochs=20,batch_size=16*strategy.num_replicas_in_sync,callbacks=[es],verbose=1)

In [None]:
pred=[np.argmax(i) for i in model.predict(test_input)]
pd.DataFrame(pred).value_counts()

In [None]:
pd.DataFrame({'id':test.id,
              'prediction':pred}).to_csv('submission.csv',index=False)