In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import shutil
import sys
import pickle

In [3]:
# Load your data from pickle files
with open('./intermediate/x.pickle', 'rb') as f:
    X_batches = pickle.load(f)

with open('./intermediate/y.pickle', 'rb') as f:
    Y_batches = pickle.load(f)

In [4]:
# Step 1: Encode the labels
from sklearn.preprocessing import MultiLabelBinarizer
label_binarizer = MultiLabelBinarizer()
y = label_binarizer.fit_transform([label for labels in Y_batches for label in labels])

In [5]:
len(y)

1741

In [6]:
x = [sentence for sublist in X_batches for sentence in sublist]
len(x)

1741

In [7]:
x[0]

'chintaman rao v state madhya pradeshram krishnav state madhya 8 november 1950'

In [8]:
y[0]

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [9]:
import pandas as pd
import random



# Generate column names
column_names = ['text'] + [f'label_{i}' for i in range(13)]

# Create a DataFrame
data = {'text': x}
for i, col_name in enumerate(column_names[1:]):
    data[col_name] = [row[i] for row in y]

df = pd.DataFrame(data)

# Display the DataFrame
print(df)


                                                   text  label_0  label_1  \
0     chintaman rao v state madhya pradeshram krishn...        0        1   
1     equivalent citation 1951 all india reporter 11...        0        0   
2                                            78 79 1950        1        0   
3     application article 32 constitution india writ...        1        0   
4                                   gn joshi petitioner        0        0   
...                                                 ...      ...      ...   
1736  intestacy presumption exists certainly fortifi...        0        0   
1737  result appeal allowed judgment decree high cou...        0        0   
1738                               appellant cost court        0        0   
1739                                     appeal allowed        0        0   
1740  agent respondent number 1 m section krishnamoo...        1        0   

      label_2  label_3  label_4  label_5  label_6  label_7  label_8  label_

In [10]:
# Define the column name mapping
column_name_mapping = {
    'text': 'Text',
    'label_0': 'NAME',
    'label_1': 'CITATION',
    'label_2': 'COUNSEL',
    'label_3': 'JUDGE',
    'label_4': 'FACTS',
    'label_5': 'RLC',
    'label_6': 'REASONING',
    'label_7': 'ARG',
    'label_8': 'STATUTE',
    'label_9': 'PRECEDENT',
    'label_10': 'RPC',
    'label_11': 'ISSUE',
    'label_12': 'OTHER'
}

# Rename the columns of the DataFrame
df = df.rename(columns=column_name_mapping)

# Display the DataFrame with renamed columns
print(df)

                                                   Text  NAME  CITATION  \
0     chintaman rao v state madhya pradeshram krishn...     0         1   
1     equivalent citation 1951 all india reporter 11...     0         0   
2                                            78 79 1950     1         0   
3     application article 32 constitution india writ...     1         0   
4                                   gn joshi petitioner     0         0   
...                                                 ...   ...       ...   
1736  intestacy presumption exists certainly fortifi...     0         0   
1737  result appeal allowed judgment decree high cou...     0         0   
1738                               appellant cost court     0         0   
1739                                     appeal allowed     0         0   
1740  agent respondent number 1 m section krishnamoo...     1         0   

      COUNSEL  JUDGE  FACTS  RLC  REASONING  ARG  STATUTE  PRECEDENT  RPC  \
0           0      0  

In [11]:
df.head()

Unnamed: 0,Text,NAME,CITATION,COUNSEL,JUDGE,FACTS,RLC,REASONING,ARG,STATUTE,PRECEDENT,RPC,ISSUE,OTHER
0,chintaman rao v state madhya pradeshram krishn...,0,1,0,0,0,0,0,0,0,0,0,0,0
1,equivalent citation 1951 all india reporter 11...,0,0,1,0,0,0,0,0,0,0,0,0,0
2,78 79 1950,1,0,0,0,0,0,0,0,0,0,0,0,0
3,application article 32 constitution india writ...,1,0,0,0,0,0,0,0,0,0,0,0,0
4,gn joshi petitioner,0,0,0,1,0,0,0,0,0,0,0,0,0


In [12]:
from sklearn.model_selection import train_test_split

# Split the DataFrame into training (80%) and test (20%) sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [13]:
# Display the sizes of the training and test sets
print("Training set size:", len(train_df))


Training set size: 1392


In [14]:
print("Test set size:", len(test_df))

Test set size: 349


In [15]:
train_df.head()

Unnamed: 0,Text,NAME,CITATION,COUNSEL,JUDGE,FACTS,RLC,REASONING,ARG,STATUTE,PRECEDENT,RPC,ISSUE,OTHER
43,statute excess requirement language employed p...,0,0,0,0,0,0,0,1,0,0,0,0,0
100,gun shot wound 3 4 x5 8 left side chest 21 2 b...,0,0,0,0,0,1,0,0,0,0,0,0,0
274,opinion present similar lordship privy council...,0,0,0,0,0,0,0,0,0,0,1,0,0
1265,decree holder applied short adjournment ultima...,0,0,0,0,0,0,1,0,0,0,0,0,0
101,gun shot wound 1 2 x3 4 right side chest mid a...,0,0,0,0,0,1,0,0,0,0,0,0,0


In [16]:
train_df.columns

Index(['Text', 'NAME', 'CITATION', 'COUNSEL', 'JUDGE', 'FACTS', 'RLC',
       'REASONING', 'ARG', 'STATUTE', 'PRECEDENT', 'RPC', 'ISSUE', 'OTHER'],
      dtype='object')

In [17]:
target_list = ['NAME', 'CITATION', 'COUNSEL', 'JUDGE', 'FACTS', 'RLC',
       'REASONING', 'ARG', 'STATUTE', 'PRECEDENT', 'RPC', 'ISSUE', 'OTHER']

In [18]:
# hyperparameters
MAX_LEN = 512
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 1e-05

In [19]:
from transformers import BertTokenizer, BertModel

In [20]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [21]:
class CustomDataset(torch.utils.data.Dataset):

    def __init__(self, df, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.df = df
        self.title = df['Text']
        self.targets = self.df[target_list].values
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split())

        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index])
        }

In [22]:
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)


In [23]:

# Optionally, you can reset the index for both DataFrames
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)





In [24]:
val_df.head()

Unnamed: 0,Text,NAME,CITATION,COUNSEL,JUDGE,FACTS,RLC,REASONING,ARG,STATUTE,PRECEDENT,RPC,ISSUE,OTHER
0,tribunal however agreed refer two question law...,1,0,0,0,0,0,0,0,0,0,0,0,0
1,provision impugned act bearing point contained...,1,0,0,0,0,0,0,0,0,0,0,0,0
2,2 4,0,0,0,0,0,1,0,0,0,0,0,0,0
3,meria,1,0,0,0,0,0,0,0,0,0,0,0,0
4,proviso section 3 madras act,0,0,0,0,0,0,0,0,0,1,0,0,0


In [25]:
train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN)

In [26]:

train_data_loader = torch.utils.data.DataLoader(train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

In [27]:
#torch.device('cuda') if torch.cuda.is_available() else
device = torch.device('cpu')


In [28]:
device

device(type='cpu')

In [29]:
def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into
    optimizer: optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_fpath)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['valid_loss_min']
    # return model, optimizer, epoch value, min validation loss
    return model, optimizer, checkpoint['epoch'], valid_loss_min.item()

def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

In [30]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased', return_dict=True)
        self.dropout = torch.nn.Dropout(0.3)
        self.linear = torch.nn.Linear(768, 13)

    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids,
            attention_mask=attn_mask,
            token_type_ids=token_type_ids
        )
        output_dropout = self.dropout(output.pooler_output)
        output = self.linear(output_dropout)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (bert_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [31]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)


In [32]:

val_targets=[]
val_outputs=[]

In [33]:
import pandas as pd
import numpy as np

# Define custom column names
columns = ["Epoch", "Training Loss", "Validation Loss"]

# Create an empty DataFrame with custom column names
loss_df = pd.DataFrame(columns=columns)

In [34]:
def train_model(n_epochs, training_loader, validation_loader, model, 
                optimizer, checkpoint_path, best_model_path):
   
  # initialize tracker for minimum validation loss
  valid_loss_min = np.Inf
   
 
  for epoch in range(1, n_epochs+1):
    train_loss = 0
    valid_loss = 0

    model.train()
    print('############# Epoch {}: Training Start   #############'.format(epoch))
    for batch_idx, data in enumerate(training_loader):
        #print('yyy epoch', batch_idx)
        ids = data['input_ids'].to(device, dtype = torch.long)
        mask = data['attention_mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        #if batch_idx%5000==0:
         #   print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print('before loss data in training', loss.item(), train_loss)
        train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.item() - train_loss))
        #print('after loss data in training', loss.item(), train_loss)
    
    print('############# Epoch {}: Training End     #############'.format(epoch))
    
    print('############# Epoch {}: Validation Start   #############'.format(epoch))
    ######################    
    # validate the model #
    ######################
 
    model.eval()
   
    with torch.no_grad():
      for batch_idx, data in enumerate(validation_loader, 0):
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)

            loss = loss_fn(outputs, targets)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.item() - valid_loss))
            val_targets.extend(targets.cpu().detach().numpy().tolist())
            val_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

      print('############# Epoch {}: Validation End     #############'.format(epoch))
      # calculate average losses
      # calculate average losses only if the data loaders are not empty
      if len(training_loader) > 0:
          train_loss = train_loss / len(training_loader)
      if len(validation_loader) > 0:
          valid_loss = valid_loss / len(validation_loader)
      # print training/validation statistics 
      print('Epoch: {} \tAvgerage Training Loss: {:.6f} \tAverage Validation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
      
      # create checkpoint variable and add important data
      checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': valid_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
      }
        
        # save checkpoint
      save_ckp(checkpoint, False, checkpoint_path, best_model_path)
        
      ## TODO: save the model if validation loss has decreased
      if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
        # save checkpoint as best model
        save_ckp(checkpoint, True, checkpoint_path, best_model_path)
        valid_loss_min = valid_loss

    print('############# Epoch {}  Done   #############\n'.format(epoch))

  return model

In [35]:
ckpt_path = "./"
best_model_path = "./"

In [36]:
trained_model= train_model(EPOCHS, train_data_loader, val_data_loader, model, optimizer, ckpt_path, best_model_path)

############# Epoch 1: Training Start   #############
############# Epoch 1: Training End     #############
############# Epoch 1: Validation Start   #############
############# Epoch 1: Validation End     #############
Epoch: 1 	Avgerage Training Loss: 0.013740 	Average Validation Loss: 0.078853


RuntimeError: [enforce fail at inline_container.cc:462] . invalid file name: ./

In [None]:
# testing
example = test_df['Text'][0]
encodings = tokenizer.encode_plus(
    example,
    None,
    add_special_tokens=True,
    max_length=MAX_LEN,
    padding='max_length',
    return_token_type_ids=True,
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt'
)
model.eval()
with torch.no_grad():
    input_ids = encodings['input_ids'].to(device, dtype=torch.long)
    attention_mask = encodings['attention_mask'].to(device, dtype=torch.long)
    token_type_ids = encodings['token_type_ids'].to(device, dtype=torch.long)
    output = model(input_ids, attention_mask, token_type_ids)
    final_output = torch.sigmoid(output).cpu().detach().numpy().tolist()
    print(train_df.columns[1:].to_list()[int(np.argmax(final_output, axis=1))])

In [2]:
import pandas as pd

df = pd.read_csv('combined_dataset.csv',sep=',',names=['label','sentence'])

In [11]:
df = df.drop(0)

In [12]:
df

Unnamed: 0,label,sentence
1,NAME,"THE NEW PIECEGOODS BAZAR CO., LTD.,BOMBAY vs T..."
2,CITATION,"Equivalent citations: 1950 AIR 165, 1950 SCR 553"
3,CITATION,Appeal No. LXVI of 1949.
4,STATUTE,"Appeal from the High Court of judicature, Bomb..."
5,COUNSEL,"K.M. Munshi (N. P. Nathvani, with him), for th..."
...,...,...
6070,REASONING,Rules of equity have no application.
6071,REASONING,where there are definite statutory provisions ...
6072,REASONING,While the courts necessarily are astute in che...
6073,RPC,For the reasons given above we concur in the c...


In [24]:
asp = {
"NAME" : 1,
"CITATION" : 2,
"COUNSEL" : 3,
"JUDGE" : 4,
"FACTS" : 5,
"RLC" : 6,
"REASONING" : 7,
"ARG" : 8,
"STATUTE" : 9,
"PRECEDENT" : 10,
"RPC" : 11,
"ISSUE" : 12
}

In [35]:
df = df.drop(df[df['label'] == 'OD'].index)   
df = df.drop(df[df['label'] == 'ISSUE'].index)   

In [36]:
cateogories = df.label.unique()

In [37]:
len(cateogories)

11

In [38]:
label_counts = df['label'].value_counts()

In [39]:
label_counts

label
REASONING    3291
FACTS        1458
PRECEDENT     271
RPC           242
STATUTE       176
RLC           171
COUNSEL       128
ARG           121
JUDGE          64
CITATION       60
NAME           59
Name: count, dtype: int64

In [40]:
df

Unnamed: 0,label,sentence
1,NAME,"THE NEW PIECEGOODS BAZAR CO., LTD.,BOMBAY vs T..."
2,CITATION,"Equivalent citations: 1950 AIR 165, 1950 SCR 553"
3,CITATION,Appeal No. LXVI of 1949.
4,STATUTE,"Appeal from the High Court of judicature, Bomb..."
5,COUNSEL,"K.M. Munshi (N. P. Nathvani, with him), for th..."
...,...,...
6070,REASONING,Rules of equity have no application.
6071,REASONING,where there are definite statutory provisions ...
6072,REASONING,While the courts necessarily are astute in che...
6073,RPC,For the reasons given above we concur in the c...


In [41]:
df.to_csv('final.csv', index=False)

In [42]:
df

Unnamed: 0,label,sentence
1,NAME,"THE NEW PIECEGOODS BAZAR CO., LTD.,BOMBAY vs T..."
2,CITATION,"Equivalent citations: 1950 AIR 165, 1950 SCR 553"
3,CITATION,Appeal No. LXVI of 1949.
4,STATUTE,"Appeal from the High Court of judicature, Bomb..."
5,COUNSEL,"K.M. Munshi (N. P. Nathvani, with him), for th..."
...,...,...
6070,REASONING,Rules of equity have no application.
6071,REASONING,where there are definite statutory provisions ...
6072,REASONING,While the courts necessarily are astute in che...
6073,RPC,For the reasons given above we concur in the c...


In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('final.csv')

# Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Split the training data into training and validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)

print(train_data.shape, val_data.shape, test_data.shape)

(3624, 2) (1208, 2) (1209, 2)


In [46]:
train_data

Unnamed: 0,label,sentence
641,COUNSEL,"M.C. Setalvad, Attorney General for India (G. ..."
5429,FACTS,"Firangojirao died in 1919, leaving Bhimabai, h..."
2217,FACTS,"The suit, out of which the appeal arises, was ..."
3071,FACTS,This document was forged with the intention of...
3373,RLC,""" The trial Court dismissed the latter petitio..."
...,...,...
853,REASONING,In the present case there was not even an alle...
5488,FACTS,He stated that although he was reluctant to ex...
5781,FACTS,29 222 The first argument which turns on the c...
3325,REASONING,The extent of interest taken by the grantee in...


# Back translation

In [61]:
!pip install translators

import pandas as pd
# current version have logs, which is not very comfortable
import translators as ts
from multiprocessing import Pool
from tqdm import *

CSV_PATH = './final.csv'
LANG = 'es'
API = 'google'


def translator_constructor(api):
    if api == 'google':
        return ts.google
    elif api == 'bing':
        return ts.bing
    elif api == 'baidu':
        return ts.baidu
    elif api == 'sogou':
        return ts.sogou
    elif api == 'youdao':
        return ts.youdao
    elif api == 'tencent':
        return ts.tencent
    elif api == 'alibaba':
        return ts.alibaba
    else:
        raise NotImplementedError(f'{api} translator is not realised!')


def translate(x):
    try:
        return [x[0], translator_constructor(API)(x[1], 'en', LANG)]
    except:
        return [x[0], x[1]]


def imap_unordered_bar(func, args, n_processes: int = 48):
    p = Pool(n_processes, maxtasksperchild=100)
    res_list = []
    with tqdm(total=len(args)) as pbar:
        for i, res in tqdm(enumerate(p.imap_unordered(func, args))):
            pbar.update()
            res_list.append(res)
    pbar.close()
    p.close()
    p.join()
    return res_list


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [53]:
newdf = pd.read_csv(CSV_PATH).sample(100)
# tqdm.pandas('Translation progress')

In [None]:
newdf = pd.read_csv(CSV_PATH).sample(100)
# tqdm.pandas('Translation progress')
newdf[['label', 'sentence']] = imap_unordered_bar(translate, df[['label','sentence']].values)
df.to_csv(f'final-{API}-{LANG}.csv')

  0%|          | 0/6041 [00:00<?, ?it/s]Process SpawnPoolWorker-3:
Process SpawnPoolWorker-1:
Process SpawnPoolWorker-2:
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-5:
Traceback (most recent call last):
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'translate' on <module '__main__' (built-in)>
Process SpawnPoolWorker-6:
Traceback (most recent call last):
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.p

KeyboardInterrupt: 

Process SpawnPoolWorker-1616:
Traceback (most recent call last):
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'translate' on <module '__main__' (built-in)>
Process SpawnPoolWorker-1617:
Traceback (most recent call last):
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/meetbanthia/miniconda3/envs/torch/lib/python3.9/multiprocessing/process.py", line 10