In [1]:
# install Huggingface Transformers library
!pip install transformers --quiet
!pip install datasets --quiet

In [2]:
# import necessary libraries
import os
import torch
import random
import numpy as np
import pandas as pd
from os.path import join
from datasets import Dataset
from google.colab import files
from datasets import concatenate_datasets, load_dataset
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available

In [3]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    if is_torch_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    if is_tf_available():
        import tensorflow as tf
        tf.random.set_seed(seed)

seed = 35
set_seed(seed)

In [4]:
dataset_mc = load_dataset('sagnikrayc/mctest', 'mc500', split='validation') 
dataset_mc = dataset_mc.remove_columns(['idx', 'properties', 'answer', 'question_is_multiple'])
dataset_mc = dataset_mc.rename_column("answer_options", "answers") 

Reusing dataset mc_test (/root/.cache/huggingface/datasets/sagnikrayc___mc_test/mc500/1.0.0/90d0767a5628921c59c5a1e8d4b25dbf398d2e0fb6300d96bd4c364a3859e93f)


In [5]:
df_mc = pd.DataFrame(dataset_mc)
print("Rows and Columns MCTest:", df_mc.shape)

Rows and Columns MCTest: (200, 3)


In [6]:
df_mc.head()

Unnamed: 0,question,story,answers
0,"If the rain arrived on Saturday, what day did ...","One sunny morning, Stewart chose to go to the ...","{'A': 'Friday', 'B': 'Monday', 'C': 'Sunday', ..."
1,What was Stewart doing at the beach?,"One sunny morning, Stewart chose to go to the ...","{'A': 'reading a newspaper', 'B': 'looking for..."
2,What did the bug want from Stewart?,"One sunny morning, Stewart chose to go to the ...","{'A': 'food', 'B': 'to read Stewart's newspape..."
3,What was the bug that Stewart found at the beach?,"One sunny morning, Stewart chose to go to the ...","{'A': 'beetle', 'B': 'caterpillar', 'C': 'ant'..."
4,What does Mommy do as a job?,My mommy has a really cool job! She makes cost...,"{'A': 'filming movies', 'B': 'making costumes ..."


In [7]:
df_mc['index'] = df_mc.index
df_mc.set_index('index',inplace=True)

In [8]:
df_mc.head()

Unnamed: 0_level_0,question,story,answers
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,"If the rain arrived on Saturday, what day did ...","One sunny morning, Stewart chose to go to the ...","{'A': 'Friday', 'B': 'Monday', 'C': 'Sunday', ..."
1,What was Stewart doing at the beach?,"One sunny morning, Stewart chose to go to the ...","{'A': 'reading a newspaper', 'B': 'looking for..."
2,What did the bug want from Stewart?,"One sunny morning, Stewart chose to go to the ...","{'A': 'food', 'B': 'to read Stewart's newspape..."
3,What was the bug that Stewart found at the beach?,"One sunny morning, Stewart chose to go to the ...","{'A': 'beetle', 'B': 'caterpillar', 'C': 'ant'..."
4,What does Mommy do as a job?,My mommy has a really cool job! She makes cost...,"{'A': 'filming movies', 'B': 'making costumes ..."


In [9]:
data = df_mc.to_csv('data-validation.csv')
files.download('data-validation.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>