In [1]:
def parse_Response(response):
   if isinstance(response, str):
       # print(response)
       start_index = response.find("{")
       end_index = response.rfind("}")
       if start_index != -1 and end_index != -1:
           valid_json_content = response[start_index : end_index + 1]
           try:
               JSON_response = json.loads(valid_json_content.replace("\n", ""))
               # append_list_to_file(JSON_response)
               return JSON_response
           except json.JSONDecodeError as e:
               print(f"Error decoding JSON response: {e.__class__.__name__} - {e}\n\n Still trying to work on particular exceptions ...")
               print("Actual Content: ", valid_json_content)
       else:
           print("No valid JSON content found in the response.")
       # time.sleep(50)
   elif isinstance(response, dict):
       return response
   else:
       print("No response message found", type(response))


In [50]:
def definingPrompt_with_longAns():

    styles_dict = {
        "style_3" : "Identify additional key points from the long answer that are not already covered in the provide **summary**."
    }

    instructions_dict = {
        "instruction_3" : (
            "Ensure the extracted points provide unique insights, supporting details, or essential context to enrich the summary further. \n"
            "append these addional points to the given summary. \n"
            "The generated rephrased answer word length should be around that of the given content. \n"
        ),
    }

    style = styles_dict["style_3"]
    instruction = instructions_dict["instruction_3"]
    
    output_format = (
        "Provide the output in JSON format with the following key:\n"
        "- `rephrased_text`: The rephrased version of the input text."
        "Ensure there is no additional text, commentary, or formatting outside the JSON structure."
    )
    
    template_string = (
        "You're an educational AI model specializing in text rephrasing, which has all the knowledge of undergraduate Bachelors of Technology"
        "Your task is to rephase the **summary** based on the provide style, and guidelines. \n\n"
        "Your writing style : {style}. \n\n"
        "Guidelines to follow : {instructions} \n\n"
        "Summary for rephrasing : {summary} \n\n"
        "Long Answer : {long_answer}\n\n"

        "{output_format} \n\n"
        "Ensure that you follow all the above guidelines and rules to generate the rephrased content. Any deviation from these guidelines \
        will result in the rephrased content not meeting the educational standards required for the exercise."
    )

    return style, instruction, output_format, template_string

In [51]:
def output_df_long_ans(parsedOutputList, longAnsList):
    summAnsList = []
    rephrasedAnsList = []
    for i in parsedOutputList:
        summAnsList.append(i['summary'])
        rephrasedAnsList.append(i['rephrased_text'])

    final_df = pd.DataFrame({
                            'longAns' : longAnsList,
                             'summary': summAnsList,
                             'rephrased_text' : rephrasedAnsList,
                            })

    return final_df

In [43]:
def longAnsFunc():
    longAnsDf = pd.read_csv('longAns.csv')
    longAnsList = longAnsDf['Answer']
    longAnsList.dropna(inplace=True)
    return longAnsList

In [44]:
l = longAnsFunc()

In [47]:
l.shape

(18,)

In [10]:
def ask_want_to_save_csv(final_df):
    import re
    choice = input("Do you want to save the output csv (y/n) : ")
    if choice.lower() == 'y':
        output_dir = 'STYLE_3_FINAL_TESTING'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        fileName = input("Enter csv file name : ")
        valid_fileName = re.sub(r'[<>:"/\\|?*]', '_', fileName)
        save_path = os.path.join(output_dir, f'{valid_fileName}.csv')
        final_df.to_csv(save_path, index=False)
        print("CSV SAVED")
    else:
        print("Ok no problem")
        print(final_df.head())

In [31]:
def summarizedAnsFunc():
    """
    Loads summarized answers from the 'genSum.csv' file and allows the user
    to select a valid range of questions for rephrasing.

    Returns:
        list : A list of summarized answers within the specified range
    """
    import ast

    try:
        # Load the data
        df = pd.read_csv('genSum.csv')
        df['summarized_ans'] = df['summary'].apply(ast.literal_eval)
        summarizedAnswerList = [i['summarizedAnswer'] for i in df.summarized_ans]

        # Validating dataset size
        total_questions = len(summarizedAnswerList)
        print(f"Enter the range of questions (1-{total_questions}) from the dataset you want to REPHRASE")

        #loading_long_answer
        longAnsDf = pd.read_csv('longAns.csv')
        longAnsList = longAnsDf['Answer']
        longAnsList.dropna(inplace=True)
        
        # getting the user input with validation
        while True:
            try:
                starting_ind = int(input(f"Starting question index (1-{total_questions}): ")) - 1
                ending_ind = int(input(f"Ending question index (1-{total_questions}): ")) - 1

                if not (0 <= starting_ind < total_questions and 0 <= ending_ind < total_questions):
                    print(f"Indices must be between 1 and {total_questions}. Please try again.")
                elif starting_ind > ending_ind:
                    print("Starting index cannot be greater than the ending index. Please try again.")
                else:
                    break
            except ValueError:
                print("Invalid input. Please enter numeric value only.")

        #loop ended successfully , meaning the range entered is correct
        return summarizedAnswerList[starting_ind: ending_ind + 1], longAnsList[starting_ind : ending_ind+1]

    except FileNotFoundError:
        print("The file was not found. Please ensure it exists in the current directory.")
    except KeyError as e:
        print(f"Error : Missing column {e} in the dataset. Please check the file format.")
    except Exception as e:
        print(f"An unexpected error occurred in the summarizedAnsFunc() : {e}")


In [12]:
def rephrasing_testing_long_ans(long_answer, summary, style, instructions, output_format, template_string,):
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
    prompt_template_1 = ChatPromptTemplate.from_template(template_string)

    particular_message = prompt_template_1.format_messages(
        long_answer = long_answer, summary=summary, style=style, instructions=instructions, output_format=output_format
    )

    try:
    
        start_time = time.time()
        response = model.invoke(particular_message)
        end_time = time.time()
        print("Latency of Response = ", end_time - start_time)
        rephrasedAns = response.content
        try :
            parsed_output = parse_Response(rephrasedAns)
            if parsed_output:
                parsed_output['summary'] = summary
                return parsed_output
            else:
                raise Exception ("could not rephrase :( ")
        except Exception as e:
            print("Error with extraction: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
            return None
    except Exception as e:
        print("Error with invocation: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
        return {"error": str(e)}



In [13]:
import json
import pandas as pd
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import getpass
import time
import traceback

In [52]:
def  main():
    load_dotenv()

    if not os.environ.get("OPENAI_API_KEY"):
        os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI : ")

    style, instructions, output_format, template_string = definingPrompt_with_longAns() # defining the prompt

    summarizedAnsList, longAnsList = summarizedAnsFunc()
    parsedOutputList = []

    #getting rephrasedResponse
    for summary, longAns in zip(summarizedAnsList, longAnsList):
        parsed_output = rephrasing_testing_long_ans(longAns, summary, style, instructions, output_format, template_string)
        parsedOutputList.append(parsed_output)

    # make an output csv file
    final_df = output_df_long_ans(parsedOutputList, longAnsList)
    ask_want_to_save_csv(final_df)


if __name__ == '__main__':
    main()

Enter the range of questions (1-18) from the dataset you want to REPHRASE


Starting question index (1-18):  14
Ending question index (1-18):  16


Latency of Response =  21.987158060073853
Latency of Response =  13.467739820480347
Latency of Response =  11.776642799377441


Do you want to save the output csv (y/n) :  y
Enter csv file name :  test-1


CSV SAVED


In [18]:
longAnsList = longAnsFunc()

In [19]:
longAnsList.head()

0    An entrepreneur is an owner or founder of a bu...
1    It is the process of identifying opportunities...
3    Demand for Entrepreneurship\nEmployment Genera...
4    Entrepreneurial competencies and traits are at...
5    Opportunity Recognition\n\nIt includes the abi...
Name: Answer, dtype: object

In [53]:
summarizedAnsList, longAnsList = summarizedAnsFunc()

Enter the range of questions (1-18) from the dataset you want to REPHRASE


Starting question index (1-18):  1
Ending question index (1-18):  4


In [58]:
print(summarizedAnsList[0])

{
    "summarized_answer": "An entrepreneur is a business founder who identifies opportunities, assumes risks, and efficiently uses resources to generate profit and contribute to economic growth. Key characteristics include: \n\n- **Innovation**: Introducing new or improved products/services.\n- **Risk-taking**: Willingness to accept financial, professional, and personal risks.\n- **Vision and Goal Orientation**: Clear long-term goals drive decision-making.\n- **Resilience**: Overcoming hurdles and staying motivated.\n- **Self-Confidence**: Belief in achieving goals and mobilizing resources.\n- **Flexibility**: Adapting strategies to changing market conditions.\n- **Leadership Skills**: Inspiring teams and creating a constructive work environment.\n- **Persistence**: Continually striving to achieve goals despite obstacles.\n- **Opportunity Identification**: Seeing prospects where others see problems.\n- **Financial Acumen**: Understanding budgeting, forecasting, and resource management