In [None]:
!pip install -q -U google-generativeai
!pip install datasets --quiet

### Import packages

Import the necessary packages.

In [1]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [2]:

from google.colab import userdata

### Setup your API key

Before you can use the Gemini API, you must first obtain an API key. If you don't already have one, create a key with one click in Google AI Studio.

<a class="button button-primary" href="https://makersuite.google.com/app/apikey" target="_blank" rel="noopener noreferrer">Get an API key</a>


In [3]:
key = '' # your api key
genai.configure(api_key = key)

## Generate text from text inputs

For text-only prompts, use the `gemini-pro` model:

In [4]:
model = genai.GenerativeModel('gemini-pro')

In [5]:
from datasets import load_dataset

In [7]:
dataset = load_dataset('squad_v2',split = 'train')
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 130319
})

In [8]:
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 130319
})

In [9]:
dataset_size = 2500  
num_examples =  dataset_size//5
data = set()
for i in dataset['context']:
  data.add(i)
  if len(data)>num_examples:
    break

In [10]:
data = list(data)

In [11]:
len(data)

501

In [12]:
import warnings
from tqdm import tqdm

# Creating an empty dataframe

In [13]:
import pandas as pd
d = {'input_ids':[],'labels':[]}
df = pd.DataFrame(d)

In [14]:
df.head(20)

Unnamed: 0,input_ids,labels


In [15]:
columns = ['input_ids','labels']
columns

['input_ids', 'labels']

# Generating Multiple choice 

In [None]:
#True Or False
count = 0
for i in tqdm(data):
    query_format = f"""For the below context generate 3 True and 3 False  with the following instructions
instruction:- Question should be prefixed with [QUESTION]: tag and answer with [ANSWER]:. Answer don't need any justification
context:{i}"""
    try:
      response = model.generate_content(query_format)
      for res in response.text.replace('•', '  *').split('\n\n'):
        result = res.replace('\n',' ')
        if 'True' or "False" in result:
          with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=DeprecationWarning)
            df = df.append(pd.Series([i+'\n Generate True or False',result],index=columns),ignore_index=True)
    except:
      pass
    if count%200==0:
      df.to_csv(f'QuizBotAI_dataset-{count}.csv',ignore_index = True)
    count+=1

In [None]:
df.head()

Unnamed: 0,input_ids,labels
0,"In the United States, scholars argue that ther...",[QUESTION]: What was the primary concern of th...
1,"In the United States, scholars argue that ther...",[QUESTION]: What argument do scholars in the U...
2,"In the United States, scholars argue that ther...",[QUESTION]: What were the consequences of prod...
3,"In the United States, scholars argue that ther...",[QUESTION]: What was the primary concern of th...
4,"In the United States, scholars argue that ther...",[QUESTION]: What factors contributed to Kissin...


# Generating Essay Answers

In [None]:
#Essay answers
count = 0
for i in tqdm(data[255:500]):
    query_format = f"""For the below context generate question and answers with the following instructions
instruction:- Question should be prefixed with [QUESTION]: tag  answer with [ANSWER]: tag and answer being less than 2 sentences.
context:{i}"""
    try:
      response = model.generate_content(query_format)
      for res in response.text.replace('•', '  *').split('\n\n'):
        result = res.replace('\n',' ')
        with warnings.catch_warnings():
          warnings.simplefilter("ignore", category=DeprecationWarning)
          df = df.append(pd.Series([i+'\n Generate Essay answers',result],index=columns),ignore_index=True)
    except:
      pass
    if count%50==3:
      df.to_csv(f'QuizBotAI_dataset_essay.csv',index = False)
    count+=1

In [22]:
df.to_csv(f'QuizBotAI_dataset-mcqs.csv',index = False)

In [None]:
from google.colab import files
files.download('QuizBotAI_dataset-mcqs.csv')

# Fill in the Blanks

In [None]:
#FIBS
count = 0
for i in tqdm(data):
    query_format = """generate fill in the blanks questions and answers for the below context and follow these instructions.
    instructions: Ensure question is prefixed with [QUESTION]: tag and answer with [ANSWER]: tag and the blank represented by '_______'.
    context: {}""".format(i)
    try:
      response = model.generate_content(query_format)
      for res in response.text.replace('•', '  *').split('\n\n'):
        result = res.replace('\n',' ')
        result = '[QUESTION]:'  + result
        with warnings.catch_warnings():
          warnings.simplefilter("ignore", category=DeprecationWarning)
          df = df.append(pd.Series([i+'\n Generate FIBS',result],index=columns),ignore_index=True)
    except Exception as e:
      print(e)
    if count%50==3:
      df.to_csv(f'QuizBotAI_dataset-FIBS.csv',index = False)
    count+=1

In [None]:
df.to_csv(f'QuizBotAI_dataset-FIBS.csv',index = False)