In [33]:
import os
import traceback
import pandas as pd
import json

In [34]:
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import OpenAI

In [35]:
KEY=os.getenv("OPENAI_API_KEY")

In [36]:
llm=OpenAI(openai_api_key=KEY,model_name="gpt-3.5-turbo",temperature=0.7)

In [37]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [38]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    }
}

In [39]:
TEMPLATE='''
Text:{text}
You are an expert MCQ maker. Given the above text it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be confirming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
'''

In [40]:
quiz_generation_prompt = PromptTemplate(
    input_variables=['text','number','subject','tone','response_json'],
    template=TEMPLATE
)

In [41]:
quiz_chain=LLMChain(llm=llm,prompt=quiz_generation_prompt,output_key='quiz',verbose=True)

In [42]:
TEMPLATE2 = '''
You are an expert english grammarian and writer.Given a multiple choice quiz for {subject} students. \
you need to evluate the complexity of the question and give a complete analysis of the quiz.
Only use at max 50 words for complexity .
If the quiz is not at per with the cognitive and analytical abilities of the students, \
update the quiz questions which need to be changed and change the tone so that it perfectly fits the student abilities.
Quiz_MCQs:
{quiz}

Check from an expert english writer of the above quiz.
'''

In [43]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=['subject','quiz'],
    template=TEMPLATE2
)

In [44]:
review_chain=LLMChain(llm=llm,prompt=quiz_evaluation_prompt,output_key='review',verbose=True)

In [45]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain,review_chain],input_variables=['text','number','subject','tone','response_json'],
                        output_variables=['quiz','review'],verbose=True)

In [46]:
filepath=r'F:\MCQ-Generator\experiment\data.txt'

In [47]:
with open(filepath,'r') as file_obj:
    TEXT=file_obj.read()

In [48]:
print(TEXT)

Data communication, including data transmission and data reception, is the transfer of data, transmitted and received over a point-to-point or point-to-multipoint communication channel. Examples of such channels are copper wires, optical fibers, wireless communication using radio spectrum, storage media and computer buses. The data are represented as an electromagnetic signal, such as an electrical voltage, radiowave, microwave, or infrared signal.

Analog transmission is a method of conveying voice, data, image, signal or video information using a continuous signal which varies in amplitude, phase, or some other property in proportion to that of a variable. The messages are either represented by a sequence of pulses by means of a line code (baseband transmission), or by a limited set of continuously varying waveforms (passband transmission), using a digital modulation method. The passband modulation and corresponding demodulation is carried out by modem equipment.

Digital communicati

In [49]:
# serialize the python dictionary into json format:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [50]:
NUMBER=5
SUBJECT='Data Communication '
TONE='simple'

In [None]:
# how to setup token usage tracking in langchain
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            'text':TEXT,
            'number': NUMBER,
            'subject': SUBJECT,
            'tone': TONE,
            'response_json': json.dumps(RESPONSE_JSON)
        }
    )

In [None]:
print(f"total tokens : {cb.total_tokens}")
print(f"prompt tokens : {cb.prompt_tokens}")
print(f"completion tokens : {cb.completion_tokens}")
print(f"total cost : {cb.total_cost}")

In [None]:
quiz=response.get('quiz')

In [None]:
quiz=json.loads(quiz)

In [None]:
quiz_table_data=[]
for key,value in quiz.items():
    mcq=value['mcq']
    options=" | ".join(
        [
            f"{option} : {option_value}"
            for option,option_value in value['options'].items()
        ]
    )
    correct=value['correct']
    quiz_table_data.append({"MCQ":mcq,"CHOICES":options,"CORRECT":correct})

In [None]:
quiz=pd.DataFrame(quiz_table_data)

In [None]:
quiz.to_csv("data_communication.csv",index=False)