### styles outlay

1. "Simple/less technical terms",
2. "More Concise Summary",
3. "Include Analogy", 
4. "Include Examples"

# CODE STARTS HERE

### importing modules

In [31]:
import json
import pandas as pd
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import getpass
import time
import traceback

### style_selection

In [44]:
def style_selection():
    """
    FUNCTION DESCRIPTION FROM SHIVAM :)
    Prompt the user to choose a style and return the selected option as an integer.
    Includes input validation to handle invalid inputs.
    """
    styles = {
        1: "Simple/less technical terms",
        2: "More Concise Summary",
        3: "Include Analogy", 
        4: "Include Examples"
    }

    while True:
        print("Choose Your Style:")
        for key, value in styles.items():
            print(f"{key}. {value}")
        try:
            style = int(input("Enter your style number (1-4): "))
            if style in styles:
                print(f"You selected : {styles[style]}")
                return style, styles[style]
            else:
                print("Invalid choice. Please enter a number between 1 and 4.")
        except ValueError:
            print("Invalid input. Please enter a numeric value. ")

### loading summarized and long answers

#### long ans function

In [33]:
def longAnsFunc():
    longAnsDf = pd.read_csv('longAns.csv')
    longAnsList = longAnsDf['Answer']
    longAnsList.dropna(inplace=True)
    return longAnsList

In [34]:
def summarized_and_long_AnsFunc():

    import ast

    try:
        # long answer
        longAnsList = longAnsFunc()
        
        # Load the data summary
        df = pd.read_csv('genSum.csv')
        df['summarized_ans'] = df['summary'].apply(ast.literal_eval)
        summarizedAnswerList = [i['summarizedAnswer'] for i in df.summarized_ans]

        # Validating dataset size
        total_questions = len(summarizedAnswerList)
        print(f"Enter the range of questions (1-{total_questions}) from the dataset you want to REPHRASE")

        # getting the user input with validation
        while True:
            try:
                starting_ind = int(input(f"Starting question index (1-{total_questions}): ")) - 1
                ending_ind = int(input(f"Ending question index (1-{total_questions}): ")) - 1

                if not (0 <= starting_ind < total_questions and 0 <= ending_ind < total_questions):
                    print(f"Indices must be between 1 and {total_questions}. Please try again.")
                elif starting_ind > ending_ind:
                    print("Starting index cannot be greater than the ending index. Please try again.")
                else:
                    break
            except ValueError:
                print("Invalid input. Please enter numeric value only.")

        #loop ended successfully , meaning the range entered is correct
        return summarizedAnswerList[starting_ind: ending_ind + 1], longAnsList[starting_ind: ending_ind+1]

    except FileNotFoundError:
        print("The file was not found. Please ensure it exists in the current directory.")
    except KeyError as e:
        print(f"Error : Missing column {e} in the dataset. Please check the file format.")
    except Exception as e:
        print(f"An unexpected error occurred in the summarizedAnsFunc() : {e}")


### getting parsed response

In [35]:
def parse_Response(response):
   if isinstance(response, str):
       # print(response)
       start_index = response.find("{")
       end_index = response.rfind("}")
       if start_index != -1 and end_index != -1:
           valid_json_content = response[start_index : end_index + 1]
           try:
               JSON_response = json.loads(valid_json_content.replace("\n", ""))
               # append_list_to_file(JSON_response)
               return JSON_response
           except json.JSONDecodeError as e:
               print(f"Error decoding JSON response: {e.__class__.__name__} - {e}\n\n Still trying to work on particular exceptions ...")
               print("Actual Content: ", valid_json_content)
       else:
           print("No valid JSON content found in the response.")
       # time.sleep(50)
   elif isinstance(response, dict):
       return response
   else:
       print("No response message found", type(response))


### defining prompt

#### prompt_style - 1, 2

In [36]:
def definingPrompt_one_two(style_no, summary):
    
    # styles
    styles_dict = {
        1: "should be simpler and use fewer technical terms compared to the original input",
        2: "more concise and should be half word length than the given text."
    }
    
    #instructions dictionary
    instructions_dict = {
        1: (
        "Rephrase the provided text such that it is Concise while retaining its original meaning. \n"
        "The generated rephrased answer word length should be around that of the given content. \n"
        "Ensure the rephrased response remains in the context of the provided content without introducing unrelated information. \n"
    ),
        2: (
        "Rephrase the provided text such that it is Concise while retaining its original meaning. \n"
        "The word count of the generated rephrased answer should be half that of the given content. \n"
       " Ensure the rephrased response remains in the context of the provided content without introducing unrelated information. \n"
    )
    }
    
    #output format
    output_format = (
        "Provide the output in JSON format with the following key:\n"
        "- `rephrased_text`: The rephrased version of the input text."
        "Ensure there is no additional text, commentary, or formatting outside the JSON structure."
    )
    
    style = styles_dict[style_no]
    instructions = instructions_dict[style_no]

    template_string = (
        "You're an Educational AI model designed to Rephrase text, which has all the knowledge of the undergraduate Bachelors of Technology Course."
        "Your task is rephrase the given text based on the your style, the context, and the given guidelines. \n\n"
        "Your writing style : {style}. \n\n"
        "Follow these instructions to generate a good quality rephrased answer: {instructions}\n"
        "Given content to be used for rephrasing: ```{text}```\n\n"
        "{output_format} \n\n"
        "Ensure that you follow all the above guidelines and rules to generate the rephrased content. Any deviation from these guidelines \
        will result in rephrased content not meeting the education standards required for this exercise."
    )

    prompt_template = ChatPromptTemplate.from_template(template_string)
    message = prompt_template.format_messages(
        text = summary, style=style, instructions=instructions, output_format=output_format
    )

    return message

### rephrasing style  1 2

In [37]:
def rephrasing_style_one_two(prompt, summary):
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
 

    try:
    
        start_time = time.time()
        response = model.invoke(prompt)
        end_time = time.time()
        print("Latency of Response = ", end_time - start_time)
        rephrasedAns = response.content
        try :
            parsed_output = parse_Response(rephrasedAns)
            if parsed_output:
                parsed_output['summary'] = summary
                return parsed_output
            else:
                raise Exception ("could not rephrase :( ")
        except Exception as e:
            print("Error with extraction: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
            return None
    except Exception as e:
        print("Error with invocation: ", type(e).__name__, "–", e, "\n", traceback.format_exc())
        return {"error": str(e)}




### rephrasing style 3 4

In [38]:
def rephrasing_style_three_four(answer: str, style_no: int) -> str:

    prompts_dict = {
        3 : (
            "You are an AI specializing in creating relatable analogies. "
            "Given the following explanation, provide a short, real-world analogy that is "
            "easy to understand and relatable for most people.\n\n"
            f"Answer: {answer}\n\n"
            "Analogy:"
            "Ensure that the Analogy should not exceed more than 50 words."
        ),
        
        4 : (
            "You are an Educational AI assistant who excels in Bachelors of Technology course. You have all the knowledge about every subject and it's whole content."
            "Given the following answer, generate three concise examples that relate to the content. Each example must be brief and to the point."
            f"Answer: {answer} \n\n"
            "Ensure that the word length of each example should not exceed more than 15 words.\n"
        ),
    }

    # Define the dictionary of models
    models_dict = {
        3: ChatOpenAI(
                model="gpt-4o-mini",
                temperature=0.7,
                n=1,
                stop=['\n'],
        ),
        
        4: ChatOpenAI(
                model="gpt-4o-mini",
                temperature=0.7,
                n=3,
        ),
    }

    prompt = prompts_dict[style_no]
    model = models_dict[style_no]


    try:
        start_time = time.time()
        message = prompt.format(answer = answer)
        response = model.invoke(message)
        end_time = time.time()
        print(end_time-start_time)

        final_response = response.content
        return final_response

    except Exception as e:
        return f"An error occurred: {e}"

### output dataframe 

In [39]:
def output_df(parsedOutputList):
    summAnsList = []
    rephrasedAnsList = []
    for i in parsedOutputList:
        summAnsList.append(i['summary'])
        rephrasedAnsList.append(i['rephrased_text'])

    final_df = pd.DataFrame({
                             'summary': summAnsList,
                             'Rephrased Ans' : rephrasedAnsList,
                            })

    return final_df

### ask_want_to_save_csv()

In [40]:
def ask_want_to_save_csv(final_df):
    import re
    choice = input("Do you want to save the output csv (y/n) : ")
    if choice.lower() == 'y':
        output_dir = 'END-FINAL-TESTING'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        fileName = input("Enter csv file name : ")
        valid_fileName = re.sub(r'[<>:"/\\|?*]', '_', fileName)
        save_path = os.path.join(output_dir, f'{valid_fileName}.csv')
        final_df.to_csv(save_path, index=False)
        print("CSV SAVED")
    else:
        print("Ok no problem")
        print(final_df.head())

## main

In [46]:
def  main():
    load_dotenv()

    if not os.environ.get("OPENAI_API_KEY"):
        os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI : ")

    style_no, style_description =style_selection()
    # print(style_no)

    summarizedAnsList, longAnsList = summarized_and_long_AnsFunc()

    parsedOutputList = []
    if style_no == 1 or style_no == 2:
        for summary in summarizedAnsList:
            prompt = definingPrompt_one_two(style_no, summary)
            parsed_output = rephrasing_style_one_two(prompt, summary)
            parsedOutputList.append(parsed_output)
            
    
    else:
        for longAns, summary in zip(longAnsList, summarizedAnsList):
            response = rephrasing_style_three_four(longAns, style_no)
            parsed_summary = parse_Response(summary)
            parsed_summary['rephrased_text'] = parsed_summary['summarized_answer'] + '\n\n' + style_description + '\n\n' + response
            parsed_summary['summary'] = parsed_summary['summarized_answer']
            parsedOutputList.append(parsed_summary)


    # make an output csv file
    final_df = output_df(parsedOutputList)
    ask_want_to_save_csv(final_df)
        

if __name__ == '__main__':
    main()

Choose Your Style:
1. Simple/less technical terms
2. More Concise Summary
3. Include Analogy
4. Include Examples


Enter your style number (1-4):  4


You selected : Include Examples
Enter the range of questions (1-18) from the dataset you want to REPHRASE


Starting question index (1-18):  15
Ending question index (1-18):  18


1.5513906478881836
1.2102408409118652
1.471825361251831
1.6738786697387695


Do you want to save the output csv (y/n) :  y
Enter csv file name :  style-4-test


CSV SAVED
