In [12]:
import json
import pandas as pd
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import getpass
import time
import traceback

In [None]:
rephrasing_system = """
You're an Educational AI Model who excels in Bachelors of Technology course. You have all the knowledge about every subject and it's whole content.
Your task is to rephrase the **summarized** answer 

## Ouput Format : 

{output_format}



In [13]:
def parse_Response(response):
   if isinstance(response, str):
       # print(response)
       start_index = response.find("{")
       end_index = response.rfind("}")
       if start_index != -1 and end_index != -1:
           valid_json_content = response[start_index : end_index + 1]
           try:
               JSON_response = json.loads(valid_json_content.replace("\n", ""))
               # append_list_to_file(JSON_response)
               return JSON_response
           except json.JSONDecodeError as e:
               print(f"Error decoding JSON response: {e.__class__.__name__} - {e}\n\n Still trying to work on particular exceptions ...")
               print("Actual Content: ", valid_json_content)
       else:
           print("No valid JSON content found in the response.")
       # time.sleep(50)
   elif isinstance(response, dict):
       return response
   else:
       print("No response message found", type(response))


In [14]:
def ask_want_to_save_csv(final_df):
    import re
    choice = input("Do you want to save the output csv (y/n) : ")
    if choice.lower() == 'y':
        fileName = input("Enter csv file name : ")
        valid_fileName = re.sub(r'[<>:"/\\|?*]', '_', fileName)
        final_df.to_csv(f'{valid_fileName}.csv', index=False)
        print("CSV SAVED")
    else:
        print("Ok no problem")
        print(final_df.head())

In [15]:
def summarizedAnsFunc():
    """
    Loads summarized answers from the 'genSum.csv' file and allows the user
    to select a valid range of questions for rephrasing.

    Returns:
        list : A list of summarized answers within the specified range
    """
    import ast

    try:
        # Load the data
        df = pd.read_csv('genSum.csv')
        df['summarized_ans'] = df['summary'].apply(ast.literal_eval)
        summarizedAnswerList = [i['summarizedAnswer'] for i in df.summarized_ans]

        # Validating dataset size
        total_questions = len(summarizedAnswerList)
        print(f"Enter the range of questions (1-{total_questions}) from the dataset you want to REPHRASE")

        # getting the user input with validation
        while True:
            try:
                starting_ind = int(input(f"Starting question index (1-{total_questions}): ")) - 1
                ending_ind = int(input(f"Ending question index (1-{total_questions}): ")) - 1

                if not (0 <= starting_ind < total_questions and 0 <= ending_ind < total_questions):
                    print(f"Indices must be between 1 and {total_questions}. Please try again.")
                elif starting_ind > ending_ind:
                    print("Starting index cannot be greater than the ending index. Please try again.")
                else:
                    break
            except ValueError:
                print("Invalid input. Please enter numeric value only.")

        #loop ended successfully , meaning the range entered is correct
        return summarizedAnswerList[starting_ind: ending_ind + 1]

    except FileNotFoundError:
        print("The file was not found. Please ensure it exists in the current directory.")
    except KeyError as e:
        print(f"Error : Missing column {e} in the dataset. Please check the file format.")
    except Exception as e:
        print(f"An unexpected error occurred in the summarizedAnsFunc() : {e}")


In [16]:
def output_df_long_ans(parsedOutputList):
    summAnsList = []
    rephrasedAnsList = []
    for i in parsedOutputList:
        summAnsList.append(i['summary'])
        rephrasedAnsList.append(i['rephrased_text'])

    final_df = pd.DataFrame({
                             'summary': summAnsList,
                             'rephrased_text' : rephrasedAnsList,
                            })

    return final_df

In [17]:
def rephrasing_testing_long_ans(long_answer, summary, style, instructions, output_format, template_string,):
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
    
    
    system = rephrasing_system.format(style, instructions, ouput_format = ouput_format)
    user = rephrasing_user.format(long_answer = long_answer, summary=summary)
    

    try:
    
        start_time = time.time()
        response = model.invoke(
            [
                ("system", system),
                ("human", user),
            ]
        )
        end_time = time.time()
        print("Latency of Response = ", end_time - start_time)
        rephrasedAns = response.content
        try :
            parsed_output = parse_Response(rephrasedAns)
            if parsed_output:
                parsed_output['summary'] = summary
                return parsed_output
            else:
                raise Exception ("could not rephrase :( ")
        except Exception as e:
            print("Error with extraction: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
            return None
    except Exception as e:
        print("Error with invocation: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
        return {"error": str(e)}



In [11]:
def longAnsFunc():
    longAnsDf = pd.read_csv('longAns.csv')
    longAnsList = longAnsDf['Answer']
    return longAnsList

In [6]:
def main():
    load_dotenv()

    if not os.environ.get("OPENAI_API_KEY"):
      os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")


    style, instructions, output_format, template_string = definingPrompt_with_long_answer() # defining the prompt in this function

    summarizedAnsList = summarizedAnsFunc()  # function to load the summarized answer
    longAnsList = longAnsFunc() # function to load answers
    parsedOutputList = []
    # getting rephrasedResponse
    for ind in range(len(summarizedAnsList)):
        parsed_output = rephrasing_testing_long_ans(longAnsList[ind], summarizedAnsList[ind], style, instructions, output_format, template_string)
        parsedOutputList.append(parsed_output)


    # make an output csv file
    final_df = output_df_long_ans(parsedOutputList)
    ask_want_to_save_csv(final_df)


if __name__ == '__main__':
    main()

NameError: name 'load_dotenv' is not defined