https://huggingface.co/transformers/training.html

In [None]:
from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True)
model.train()

In [None]:
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=1e-5)

In [None]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)

In [None]:
from transformers import get_linear_schedule_with_warmup
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_train_steps)

loss.backward()
optimizer.step()
scheduler.step()

In [None]:
for param in model.base_model.parameters():
    param.requires_grad = False

In [1]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

In [None]:
'a'

In [1]:
import os
from pathlib import Path
import logging
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from time import time
from datetime import date
import argparse
import matplotlib.pyplot as plt

from modules.data import ImportData, QuoraQuestionDataset
from modules.embeddings import EmbeddedVocab
from modules.models import SiameseBERT, ClassifierBERT
from modules.utils import collate_fn_bert, train_bert, eval_bert, setup_logger


today = str(date.today())
path = Path(f'./logs/train_job_{today}/')
emb_path = Path('./logs/embeddings')
data_path = Path('./logs/data')

parser = argparse.ArgumentParser()
parser.add_argument("-model_name", "--model_name", type=str, help="Name of trained model. Needed only for correct logs output", default='bert')  
parser.add_argument("-log", "--logdir", type=str, help="Directory to save all downloaded files, and model checkpoints.", default=path)  
parser.add_argument("-df", "--data_file", type=str, help="Path to dataset.", default=data_path/"dataset.csv")
parser.add_argument("-s", "--split_seed", type=int, help="Seed for splitting the dataset.", default=44)
parser.add_argument("-b", "--batch_size", type=int, help="Batch Size.", default=8)
parser.add_argument("-epo", "--n_epoch", type=int, help="Number of epochs.", default=4)
parser.add_argument("-bert_cls", "--bert_cls", type=str, help="Type of BERT trained (classificator, siamese).", default='classifier')
parser.add_argument("-bert_backbone", "--bert_backbone", type=str, help="Either path to the model, or name of the BERT model that should be used, compatible with HuggingFace Transformers.", default='bert-base-uncased')

args = parser.parse_args('')
args.logdir = args.logdir/args.model_name
model_path = args.logdir/'best_model/'
if not args.logdir.exists():
    os.makedirs(args.logdir)

logger = setup_logger(str(args.logdir/'logs.log'))
logger.info("Begining job. All files and logs will be saved at: {}".format(args.logdir))


logger.info('Reading Dataset and splitting into train and test datasets with seed: {}'.format(args.split_seed))
data = ImportData(str(args.data_file))
data.train_test_split(seed=args.split_seed)


logger.info('')
logger.info('Number of training samples        :{}'.format(len(data.train)))
logger.info('Number of validation samples      :{}'.format(len(data.test)))
logger.info('')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataloader = DataLoader(data.train.values, batch_size = args.batch_size, shuffle=True, collate_fn=collate_fn_bert)
test_dataloader = DataLoader(data.test.values, batch_size= args.batch_size, shuffle=False, collate_fn=collate_fn_bert)

2020-09-21 13:20:03,235 | Begining job. All files and logs will be saved at: logs\train_job_2020-09-21\bert
2020-09-21 13:20:03,238 | Reading Dataset and splitting into train and test datasets with seed: 44
2020-09-21 13:20:04,120 | 
2020-09-21 13:20:04,121 | Number of training samples        :364287
2020-09-21 13:20:04,122 | Number of validation samples      :40000
2020-09-21 13:20:04,124 | 


In [3]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [5]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

model = BertForSequenceClassification.from_pretrained("bert-large-uncased")
model = nn.DataParallel(model)

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    overwrite_output_dir = True,
    evaluate_during_training = True,
    logging_first_step = True,
    num_train_epochs=3,              # total # of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
)

- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,  # training arguments, defined above
    data_collator=collate_fn_bert, 
    train_dataset=data.train.values,         # training dataset
    eval_dataset=data.test.values,
    compute_metrics = compute_metrics# evaluation dataset
)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-26fae992afee>", line 6, in <module>
    eval_dataset=data.test.values
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\transformers\trainer.py", line 199, in __init__
    self.tb_writer = SummaryWriter(log_dir=self.args.logging_dir)
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\tensorboard\writer.py", line 225, in __init__
    self._get_file_writer()
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\tensorboard\writer.py", line 256, in _get_file_writer
    self.flush_secs, self.filename_suffix)
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\tensorboard\writer.py", lin

AttributeError: module 'tensorflow' has no attribute 'io'

In [6]:
??TrainingArguments

[1;31mInit signature:[0m
[0mTrainingArguments[0m[1;33m([0m[1;33m
[0m    [0moutput_dir[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0moverwrite_output_dir[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdo_train[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdo_eval[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdo_predict[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mevaluate_during_training[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mper_device_train_batch_size[0m[1;33m:[0m [0mint[0m [1;33m=[0m [1;36m8[0m[1;33m,[0m[1;33m
[0m    [0mper_device_eval_batch_size[0m[1;33m:[0m [0mint[0m [1;33m=[0m [1;36m8[0m[1;33m,[0m[1;33m
[0m    [0mper_gpu_train_batch_size[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mint[0m[1;33m,[0m [0mNon

In [2]:
??Trainer

[1;31mInit signature:[0m
[0mTrainer[0m[1;33m([0m[1;33m
[0m    [0mmodel[0m[1;33m:[0m [0mtransformers[0m[1;33m.[0m[0mmodeling_utils[0m[1;33m.[0m[0mPreTrainedModel[0m[1;33m,[0m[1;33m
[0m    [0margs[0m[1;33m:[0m [0mtransformers[0m[1;33m.[0m[0mtraining_args[0m[1;33m.[0m[0mTrainingArguments[0m[1;33m,[0m[1;33m
[0m    [0mdata_collator[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mDataCollator[0m[1;33m,[0m [0mNoneType[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mtrain_dataset[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mtorch[0m[1;33m.[0m[0mutils[0m[1;33m.[0m[0mdata[0m[1;33m.[0m[0mdataset[0m[1;33m.[0m[0mDataset[0m[1;33m,[0m [0mNoneType[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0meval_dataset[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mtorch[0m[1;33m.[0m[0mutils[0m[1;33m.[0m[0mdata[0m[1;33m.[0m[0mdataset[0m[1;33m.[0m[0mDataset[0m[1;33m,[0m [0mNon

In [1]:
import os
from pathlib import Path
import logging
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from time import time
from datetime import date
import argparse
import matplotlib.pyplot as plt

from modules.data import ImportData, QuoraQuestionDataset
from modules.embeddings import EmbeddedVocab
from modules.models import SiameseBERT, ClassifierBERT
from modules.utils import collate_fn_bert, train_bert, eval_bert, setup_logger


today = str(date.today())
path = Path(f'./logs/train_job_{today}/')
emb_path = Path('./logs/embeddings')
data_path = Path('./logs/data')

parser = argparse.ArgumentParser()
parser.add_argument("-model_name", "--model_name", type=str, help="Name of trained model. Needed only for correct logs output", default='bert')  
parser.add_argument("-log", "--logdir", type=str, help="Directory to save all downloaded files, and model checkpoints.", default=path)  
parser.add_argument("-df", "--data_file", type=str, help="Path to dataset.", default=data_path/"dataset.csv")
parser.add_argument("-s", "--split_seed", type=int, help="Seed for splitting the dataset.", default=44)
parser.add_argument("-b", "--batch_size", type=int, help="Batch Size.", default=8)
parser.add_argument("-epo", "--n_epoch", type=int, help="Number of epochs.", default=4)
parser.add_argument("-bert_cls", "--bert_cls", type=str, help="Type of BERT trained (classificator, siamese).", default='classifier')
parser.add_argument("-bert_backbone", "--bert_backbone", type=str, help="Either path to the model, or name of the BERT model that should be used, compatible with HuggingFace Transformers.", default='bert-base-uncased')

args = parser.parse_args('')
args.logdir = args.logdir/args.model_name
model_path = args.logdir/'best_model/'
if not args.logdir.exists():
    os.makedirs(args.logdir)

logger = setup_logger(str(args.logdir/'logs.log'))
logger.info("Begining job. All files and logs will be saved at: {}".format(args.logdir))


logger.info('Reading Dataset and splitting into train and test datasets with seed: {}'.format(args.split_seed))
data = ImportData(str(args.data_file))
data.train_test_split(seed=args.split_seed)


logger.info('')
logger.info('Number of training samples        :{}'.format(len(data.train)))
logger.info('Number of validation samples      :{}'.format(len(data.test)))
logger.info('')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataloader = DataLoader(data.train.values, batch_size = args.batch_size, shuffle=True, collate_fn=collate_fn_bert)
test_dataloader = DataLoader(data.test.values, batch_size= args.batch_size, shuffle=False, collate_fn=collate_fn_bert)

model = SiameseBERT(args.bert_backbone, device) if args.bert_cls=='siamese' else ClassifierBERT(args.bert_backbone, device)
model = model.float()
model = nn.DataParallel(model)
model = model.to(device)

criterion = nn.MSELoss() if args.bert_cls=='siamese' else nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

logger.info('Building model.')
logger.info('--------------------------------------')
logger.info('Model Parameters:')
logger.info('Bert Backbone:               :{}'.format(args.bert_backbone))
logger.info('--------------------------------------')
logger.info('Training Parameters:')
logger.info('Device                       :{}'.format(str(device)))
logger.info('Optimizer                    :{}'.format(' Adam'))
logger.info('Loss function                :{}'.format('MSE' if args.bert_cls == 'siamese' else 'CE'))
logger.info('Batch Size                   :{}'.format(args.batch_size))
logger.info('Number of Epochs             :{}'.format(args.n_epoch))
logger.info('--------------------------------------')

start = time()
all_train_losses = []
all_test_losses = []
train_accuracies = []
test_accuracies = []
best_acc = 0.5
logger.info("Training the model...")
for epoch in range(args.n_epoch):
    epoch_time = time()
    epoch_iteration = 0
    epoch_loss=[]
    preds_train = []

    train_bert(model, optimizer, criterion, train_dataloader, device, epoch_loss, preds_train, epoch, logger)

    eval_loss = []
    preds_test = []
    eval_bert(model, criterion, test_dataloader, device, eval_loss, preds_test)

    train_loss = np.mean(epoch_loss)
    train_accuracy = np.sum(preds_train)/data.train.shape[0]
    test_loss = np.mean(eval_loss)
    test_accuracy = np.sum(preds_test)/data.test.shape[0]

    if test_accuracy>best_acc:
        if not model_path.exists():
            os.mkdir(model_path)
        logger.info('Saving best model at: {}'.format(str(model_path/'checkpoint.pth')))
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.module.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'test_loss': test_loss,
            'test_accuracy':test_accuracy
            }, str(model_path/'checkpoint.pth'))

    all_train_losses.append(train_loss)
    all_test_losses.append(test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    logger.info('Mean loss and accuracy of epoch {} - train: {}, {}, test: {}, {}. Calculation time: {} hours'.format(epoch, train_loss, round(train_accuracy, 4), test_loss, round(test_accuracy, 4), (time() - epoch_time)/3600))

logger.info("Model training finished in: {}".format(np.round((time()-start)/60, 3)))

plt.figure(figsize=(10,6))
plt.title(f'Train and test losses during training of {args.model_name} model')
plt.plot(list(range(len(all_train_losses))), all_train_losses, label='train')
plt.plot(list(range(len(all_test_losses))), all_test_losses, label='test')
plt.legend()
plt.grid(alpha=0.5)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.savefig(args.logdir/'loss_plots.png')
plt.show()

plt.figure(figsize=(10,6))
plt.title(f'Train and test losses during training of {args.model_name} model')
plt.plot(list(range(len(train_accuracies))), train_accuracies, label='train')
plt.plot(list(range(len(test_accuracies))), test_accuracies, label='test')
plt.legend()
plt.grid(alpha=0.5)
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.savefig(args.logdir/'acc_plots.png')
plt.show()

2020-09-21 08:44:48,297 | Begining job. All files and logs will be saved at: logs\train_job_2020-09-21\bert
2020-09-21 08:44:48,300 | Reading Dataset and splitting into train and test datasets with seed: 44
2020-09-21 08:44:49,179 | 
2020-09-21 08:44:49,180 | Number of training samples        :364287
2020-09-21 08:44:49,181 | Number of validation samples      :40000
2020-09-21 08:44:49,181 | 
2020-09-21 08:44:54,625 | Building model.
2020-09-21 08:44:54,626 | --------------------------------------
2020-09-21 08:44:54,627 | Model Parameters:
2020-09-21 08:44:54,627 | Bert Backbone:               :bert-base-uncased
2020-09-21 08:44:54,628 | --------------------------------------
2020-09-21 08:44:54,628 | Training Parameters:
2020-09-21 08:44:54,630 | Device                       :cuda
2020-09-21 08:44:54,630 | Optimizer                    : Adam
2020-09-21 08:44:54,631 | Loss function                :CE
2020-09-21 08:44:54,632 | Batch Size                   :8
2020-09-21 08:44:54,632 | N

HBox(children=(FloatProgress(value=0.0, max=45536.0), HTML(value='')))

2020-09-21 08:44:55,242 | Mean loss till 0th iteration of epoch 0: 0.5728853940963745


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-67e43ab18ede>", line 93, in <module>
    train_bert(model, optimizer, criterion, train_dataloader, device, epoch_loss, preds_train, epoch, logger)
  File "C:\Users\rafal.wojcik\Desktop\magisterka\code\repo\SentenceBERT_vs_SiameseLSTM\modules\utils.py", line 121, in train_bert
    loss.backward()
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\tensor.py", line 195, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\autograd\__init__.py", line 99, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0;

RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 4.00 GiB total capacity; 2.72 GiB already allocated; 9.63 MiB free; 2.92 GiB reserved in total by PyTorch)

-------------

In [1]:
import os
from pathlib import Path
import logging
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from time import time
from datetime import date
import argparse


from modules.data.data import ImportData
from modules.models.models import SiameseBERT, ClassifierBERT
from modules.utils.utils import collate_fn_bert, train, eval, setup_logger

In [2]:
dataset = ImportData('train.csv')
dataset.train_test_split()

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataloader = DataLoader(dataset.train.values, batch_size = 2, shuffle=True, collate_fn=collate_fn_bert)
test_dataloader = DataLoader(dataset.test.values, batch_size=2, shuffle=False, collate_fn=collate_fn_bert)

In [4]:
model = ClassifierBERT('bert-base-uncased', device)#SiameseBERT('bert-base-uncased', device)
model = nn.DataParallel(model)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [6]:
from tqdm.notebook import tqdm

In [7]:
losses = []
acc = 0
for inputs in tqdm(train_dataloader):
    y_true = inputs[2].to(device)
    out = model(inputs)
    loss = criterion(out, y_true)
    losses.append(loss.item())
    
    acc += ((out > 0.5) == y_true).sum() 

HBox(children=(FloatProgress(value=0.0, max=182144.0), HTML(value='')))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-c38724add86a>", line 7, in <module>
    losses.append(loss.item())
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2040, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\rafal.wojcik\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\pywrap_tensorflow.py", line 58, in <module>
    from tensorflow.python.pywrap_tenso

KeyboardInterrupt: 

do wczytywania pretrenowanego BERTa z poporzednich treningow:

In [None]:
    def save(self, output_path: str):
        self.bert.save_pretrained(output_path)
        self.tokenizer.save_pretrained(output_path)

        with open(os.path.join(output_path, 'sentence_bert_config.json'), 'w') as fOut:
            json.dump(self.get_config_dict(), fOut, indent=2)

    @staticmethod
    def load(input_path: str):
        with open(os.path.join(input_path, 'sentence_bert_config.json')) as fIn:
            config = json.load(fIn)
        return BERT(model_name_or_path=input_path, **config)

In [1]:
from datasets import load_dataset

PyTorch version 1.4.0 available.


In [None]:
squad_dataset = load_dataset('quora')

https://raw.githubusercontent.com/huggingface/datasets/1.0.1/datasets/quora/quora.py not found in cache or force_download set to True, downloading to C:\Users\rafal.wojcik\.cache\huggingface\datasets\tmpy3ug5j8n


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1136.0, style=ProgressStyle(description…

storing https://raw.githubusercontent.com/huggingface/datasets/1.0.1/datasets/quora/quora.py in cache at C:\Users\rafal.wojcik\.cache\huggingface\datasets\8d401fbfd0add0660cc0df5713dd4060628c75ae1ca59aa7a2ae5e3bf3f44143.cbc360d66db33907209db89868294e636e141d7865401b2d42dfce5c8e8b5c25.py
creating metadata file for C:\Users\rafal.wojcik\.cache\huggingface\datasets\8d401fbfd0add0660cc0df5713dd4060628c75ae1ca59aa7a2ae5e3bf3f44143.cbc360d66db33907209db89868294e636e141d7865401b2d42dfce5c8e8b5c25.py





Checking C:\Users\rafal.wojcik\.cache\huggingface\datasets\8d401fbfd0add0660cc0df5713dd4060628c75ae1ca59aa7a2ae5e3bf3f44143.cbc360d66db33907209db89868294e636e141d7865401b2d42dfce5c8e8b5c25.py for additional imports.
Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.0.1/datasets/quora/quora.py at C:\Users\rafal.wojcik\.cache\huggingface\modules\datasets_modules\datasets\quora
Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.0.1/datasets/quora/quora.py at C:\Users\rafal.wojcik\.cache\huggingface\modules\datasets_modules\datasets\quora\2be517cf0ac6de94b77a103a36b141347a13f40637fbebaccb56ddbe397876be
Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.0.1/datasets/quora/quora.py to C:\Users\rafal.wojcik\.cache\huggingface\modules\datasets_modules\datasets\quora\2be517cf0ac6de94b77a103a36b141347a13f40637fbebaccb56ddbe397876be\quora.py
Couldn't find dataset infos file at https:/