# README
- Download Ollama
- Run the following commands in command prompt in order:
-   ollama pull llama3
-   ollama serve

In [4]:
%pip install langchain pandas beeprint

Collecting langchain
  Using cached langchain-0.3.3-py3-none-any.whl.metadata (7.1 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting PyYAML>=5.3 (from langchain)
  Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Using cached SQLAlchemy-2.0.35-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.10.10-cp312-cp312-win_amd64.whl.metadata (7.8 kB)
Collecting langchain-core<0.4.0,>=0.3.10 (from langchain)
  Using cached langchain_core-0.3.10-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Using cached langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.134-py3-none-any.whl.metadata (13 kB)
Collecting numpy<2.0.0,>=1.26.0 (from langchain)
  Using cached num

In [2]:
%pip install beeprint

Note: you may need to restart the kernel to use updated packages.


In [5]:
from langchain import PromptTemplate
from langchain_core.prompts.few_shot import FewShotPromptTemplate
import pandas as pd
import numpy as np
import os
from beeprint import pp
import re
import requests

In [22]:
# The dictionary including every grading criteria for each section in the input 
criteria_dict = {
    "Description": [
        "1. Does the name of the \"Project Name:\" attribute specified in the file?",
        "2. Does the name of the \"Clinic:\" attribute specified in the file?",
        "3. Does the name of the \"Process:\" attribute specified in the file?",
        "4. Does the name of the \"TIP 2.0 Process Milestone:\" attribute specified in the file?",
        "5. Does the name of the \"TIP 2.0 Process Milestone:\" attribute provide specific process milestone details?",
        "6. Does the value of the \"Project Description / Purpose\" attribute include a brief description of the project?",
        "7. Does the value of the \"Project Description / Purpose\" attribute include a sentence that explicitly state the purpose of the project?",
        "8. Does the value of the \"Project Description / Purpose\" attribute include a detailed expected results from the project?"
    ],
    "Overview": [
        "1. Does the value of the \"Problem Summary:\" attribute include a detailed (at least 1 sentence) and logical problem statement (relating to the project description/purpose)?",
        "2. Does the value of the \"Desired Outcome(s):\" attribute include a at least 1 sentence explicitly talking about the expected results/desired outcomes?",
        "3. Does the value of the \"Benefits:\" attribute include a least 1 sentence explicitly talking about the benefits of this project?"
    ],
    "Timeline": [
        "1. Does the value of the each task list key tasks?",
        "2. Does the value of the each task contain a specific planned completion due dates?"
    ],
    "Scope": [
        "3. Does the value of the \"In Scope Project Objectives\" attribute include an aligned objectives with the project purpose?",
        "4. Does the value of the \"Out of Scope Project Objectives or Activities\" attribute include an aligned objectives with the project purpose?",
        "5. Does the value of the In Scope Project Objectives align with project milestone?"
    ],
    "Team": [
        "1. Does the value of the \"Team Lead:\" attribute contain an individual's name?",
        "2. Does the value of the \"Project Champion:\" attribute contain an individual's name?",
        "3. Does the value of the \"Process Owner:\" attribute contain an individual's name?",
        "4. Does the value of the \"Process Manager:\" attribute contain an individual's name?",
        "5. Does the value of the \"Stakeholders\" \"stakeholder\" attribute contain an individual's name?",
        "6. Do the \"Stakeholders\" contain values for the attributes: title, department, and organization?",
        "7. Does the value of the \"Project Team Members\" \"Name\" attribute contain an individual's name?",
        "8. Do the \"Project Team Members\" contain role values for the \"Team role\" attribute?"
    ]

}

In [19]:
# The prompts including the examples of good and bad submission for each section
grading_prompts = PromptTemplate.from_template("""
You are grading a project {section}. This is the criteria: {criteria}. Give the grade in range 0 to 5. If the information specified in
                                               criteria was not provided in the submission, even if the criteria is partially met, grade it 0. If the submission include the information needed in criteria,
                                                grade it 3. If the information is very detailed, more than 2 sentences, grade it 5.
                                               Output with format: Grade: [grade]. Comment: [comment]


Grade this submission:
{input}
""")

In [13]:
# Grading prompts without example
grading_prompts_v2 = {
    "Milestone": (PromptTemplate.from_template("""
You are grading a project milestone. It should  {criteria}. Compare the submission’s milestone with the example with a grade of 100 and the example with a grade of 50. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]


Grade this submission:
{milestone}
""")),

   "Description":  (PromptTemplate.from_template("""
You are grading a project description. It should  {criteria}. Compare the submission’s Project Description / Purpose with the example with grade of 100 and the example with a grade of 50. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]

Grade this submission:
{description}
""")),

"Overview": (PromptTemplate.from_template("""
You are grading a project overview. This project description is also provided. It should {criteria}.Compare the submission with the example with a grade of 100 and the example with a grade of 50. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]

Grade this submission:
{overview}
""")),

"Timeline": (PromptTemplate.from_template("""
You are grading a project timeline. It should  {criteria}. Compare the submission with the example with a grade of 100 and the example with a grade of 50. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]

Grade this submission:
{timeline}
""")),

"Scope": (PromptTemplate.from_template("""
You are grading a project scope. It should {criteria}. Compare the submission with the example with a grade of 100 and the example with a grade of 50. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]
Grade this project scope given its project description:
Project description: {description}
Project scope: {scope}
""")),

"Team": (PromptTemplate.from_template("""
You are grading the project team member part. It should {criteria}. Compare the submission with the example with a grade of 100 and the example with a grade of 0. The highest grade possible is 100 and the lowest grade possible is 0. Output with format: Grade: [grade]. Comment: [comment]

Grade this submission:
{team}
"""))
}


In [6]:
def extract_dictionary_from_text_file(file_path):
    section_dict = {}
    text = open(file_path, 'r').read()
    target_sections = ["Description", "Overview", "Timeline","Scope", "Team"]
    pattern = r'(' + '|'.join(re.escape(name) for name in target_sections) + '):'

    # Find all the section headers and split the text accordingly
    sections = re.split(pattern, text)

    # The first element in sections is either empty or non-useful text before the first header
    sections = sections[1:]  # Skip the first element as it would be empty or non-section text

    # Creating a dictionary to store the section names and their corresponding content

    # Iterate over the list in pairs: section name and its corresponding text
    for i in range(0, len(sections), 2):
        section_name = sections[i].strip()  # Section name (e.g., 'Description')
        section_content = sections[i+1].strip()  # Corresponding content

        if section_name in target_sections:  # Only add the section if it's in the predefined list
            section_dict[section_name] = section_content
    section_dict['Milestone'] = section_dict['Description']
    
    return section_dict

# Auto generate rubric Class

In [33]:
real_comment_df = pd.read_csv("auto_grading_trainingset\Simulated_submissions_for_training_LLM.csv")
real_comment_df

  real_comment_df = pd.read_csv("auto_grading_trainingset\Simulated_submissions_for_training_LLM.csv")


Unnamed: 0,Filename,Author,Section,Criteria,Total points,Reson deduct points
0,input.txt,example2,,,6.0,
1,input2.txt,Angela Abad,,,5.0,Team missing Project champion and Process manager
2,input3.txt,Kim Briggs,,,5.0,"I do not see enough detail on problem summary,..."
3,input4.txt,example1,,,6.0,
4,input5.txt,Yesenia Bravo,,,6.0,
5,input6.txt,Malca Cortez,,,5.0,No project description
6,input7.txt,crystal,,,5.0,the project milestone doesn't look right
7,input8.txt,Sarah Davis,,,5.0,the project milestone lack detail
8,input9.txt,Jennifer Diaz-Montano,,,3.0,I do not see enough detail about the project d...
9,input10.txt,Karen Eynon,,,5.0,I do not see enough detail about the project d...


In [34]:
real_comment_df['Reson deduct points'].tolist()

[nan,
 'Team missing Project champion and Process manager',
 'I do not see enough detail on problem summary, desired outcomes, and benefits.',
 nan,
 nan,
 'No project description',
 "the project milestone doesn't look right",
 'the project milestone lack detail',
 'I do not see enough detail about the project description and purpose statement; I see project tasks but no due dates; The scope objectives do not match the previously stated project purpose. The project scope objectives could simply be "To better address the HRSN needs of patients". The out of scope objective could be "We are not implementing any other platforms".',
 'I do not see enough detail about the project description and purpose statement. I recommend a clear statement like "The purpose of this project is to implement a ...".',
 'Fail in Description (lack purpose/expected result), Overview(lack outcomes and benefits), Timeline (lack due date), Team members (lack every fields in team section)',
 "Fail in Description (

In [43]:
dict(zip(real_comment_df['Filename'].tolist(),real_comment_df['Reson deduct points'].tolist()))

{'input.txt': nan,
 'input2.txt': 'Team missing Project champion and Process manager',
 'input3.txt': 'I do not see enough detail on problem summary, desired outcomes, and benefits.',
 'input4.txt': nan,
 'input5.txt': nan,
 'input6.txt': 'No project description',
 'input7.txt': "the project milestone doesn't look right",
 'input8.txt': 'the project milestone lack detail',
 'input9.txt': 'I do not see enough detail about the project description and purpose statement; I see project tasks but no due dates; The scope objectives do not match the previously stated project purpose. The project scope objectives could simply be "To better address the HRSN needs of patients". The out of scope objective could be "We are not implementing any other platforms".',
 'input10.txt': 'I do not see enough detail about the project description and purpose statement. I recommend a clear statement like "The purpose of this project is to implement a ...".',
 'input11.txt': 'Fail in Description (lack purpose/e

In [38]:
# The prompts including the examples of good and bad submission for each section
rubric_prompts = PromptTemplate.from_template("""
Based on a comment from a real grader, you are generating the grades for section {section} with criteria {criteria} . 
                                               Give the grade in range 0 to 5. If the comment is None, give this criteria
                                               a grade of 5. If the comment mention this section or this criteria, give this criteria a grade of 0.
                                               Output with format: Grade: [grade]. Comment: [comment]


This is the comment:
{input}
""")

In [76]:
class Rubric():
    def __init__(self,inputfile_folder_path,real_comment_file_path,rubric_prompts,rubric_criteria):
        self.root_dir = inputfile_folder_path
        self.file_list = os.listdir(inputfile_folder_path),
        self.rubric_prompts = rubric_prompts,
        self.rubric_criteria = rubric_criteria,
        self.sections = ["Description", "Overview", "Timeline","Scope","Team"]
        self.rubric = pd.DataFrame({
            "filename": [],
            "Section": [],
            "Criteria": [],
            "Real_Grade": []
        }),
        self.real_comment_df = pd.read_csv(real_comment_file_path).fillna('None')
        self.comment_dict = dict(zip(self.real_comment_df['Filename'].tolist(),self.real_comment_df['Reson deduct points'].tolist()))

    def get_score(self,prompt):
        url = "http://localhost:11434/api/chat"
        # The JSON data that would be sent in the POST request
        data = {
            "model": "llama3",
            "messages": [
                { "role": "user", "content": prompt }
            ],
            "stream": False
        }
        # Send the POST request
        response = requests.post(url, json=data)

        # Check if the request was successful
        if response.status_code == 200:
            # Return the JSON response
            print(response.json()["message"]["content"])
            pattern = r"Grade:\s*([A-Za-z0-9\+]+)\s*(.*)"
            match = re.search(pattern, response.json()["message"]["content"])
            return [match.group(1),match.group(2)]
            
        else:
            print(f"Error: {response.status_code}, {response.text}")
            return "-1"
        
    def generatePrompt(self, section, criteria, comment):
        prompt = grading_prompts.format(
            criteria=criteria,
            section=section,
            input=comment
            )
        # print(prompt)
        return prompt

    def get_y_test(self):
        for filename in self.file_list[0]:
            print(f"File name: {filename}")
            comment = self.comment_dict[filename]
            for section in self.sections:
                for criteria in self.rubric_criteria[0][section]:
                    current_prompt = self.generatePrompt(section,criteria,comment)
                    [score,comment] = self.get_score(current_prompt)
                    self.rubric[0].loc[len(self.rubric[0].index)] = [filename,section,criteria,score]
                    print(f"Finished {len(self.rubric[0].index)}/{len(self.file_list[0])}")
        print("Finished generating real grade.")
        return self.rubric[0]

In [77]:
rubric = Rubric(inputfile_folder_path="auto_grading_trainingset/input_files/",
                real_comment_file_path="auto_grading_trainingset\Simulated_submissions_for_training_LLM.csv",
                rubric_prompts=rubric_prompts,
                rubric_criteria=criteria_dict)
real_grade_df = rubric.get_y_test()
real_grade_df

  real_comment_file_path="auto_grading_trainingset\Simulated_submissions_for_training_LLM.csv",


File name: input.txt
Grade: 5. Comment: None
Finished 1/15
Grade: 5. Comment: .
Finished 2/15
Grade: 5. Comment: .
Finished 3/15
Grade: 5. Comment: .
Finished 4/15
Grade: 5. Comment: .
Finished 5/15
Grade: 5. Comment: .
Finished 6/15
Grade: 5. Comment: .
Finished 7/15
Grade: 5. Comment: .
Finished 8/15
Grade: 5. Comment: .
Finished 9/15
Grade: 5. Comment: .
Finished 10/15
Grade: 5
Comment: .
Finished 11/15
Since the comment is blank (i.e., None), I will assign a grade of 5 to this criterion.

Output:
Grade: 5
Comment: .
Finished 12/15
Grade: 5. Comment: .
Finished 13/15
Grade: 5. Comment: None
Finished 14/15
Here is the output:

Grade: 5
Comment: . Comment: None
Finished 15/15
Grade: 5. Comment: None
Finished 16/15
Grade: 5. Comment: . None
Finished 17/15
Grade: 5
Comment: None
Finished 18/15
Grade: 5
Comment: None
Finished 19/15
Grade: 5
Comment: None
Finished 20/15
Grade: 5
Comment: None
Finished 21/15
Grade: 5
Comment: None
Finished 22/15
Grade: 5
Comment: None
Finished 23/15
Grade:

Unnamed: 0,filename,Section,Criteria,Real_Grade
0,input.txt,Description,"1. Does the name of the ""Project Name:"" attrib...",5
1,input.txt,Description,"2. Does the name of the ""Clinic:"" attribute sp...",5
2,input.txt,Description,"3. Does the name of the ""Process:"" attribute s...",5
3,input.txt,Description,"4. Does the name of the ""TIP 2.0 Process Miles...",5
4,input.txt,Description,"5. Does the name of the ""TIP 2.0 Process Miles...",5
...,...,...,...,...
355,input9.txt,Team,"4. Does the value of the ""Process Manager:"" at...",5
356,input9.txt,Team,"5. Does the value of the ""Stakeholders"" ""stake...",5
357,input9.txt,Team,"6. Do the ""Stakeholders"" contain values for th...",5
358,input9.txt,Team,"7. Does the value of the ""Project Team Members...",5


In [78]:
real_grade_df.to_csv("real_result_on_14_submissions_v4.csv")

# Submission Class

In [7]:
class Submission():
    def __init__(self,file_path,grading_prompts,grading_criterias):
        self.file_path = file_path
        self.sections = ["Description", "Overview", "Timeline","Scope","Team"]
        self.grading_prompts = grading_prompts
        self.grading_criterias = grading_criterias
        self.content_dict = {}
        self.extract_dictionary_from_text_file(self.file_path)

    def generatePrompt(self, section, criteria):
        prompt = grading_prompts.format(
            criteria=criteria,
            section=section,
            input=self.content_dict[section]
            )
        # print(prompt)
        return prompt

    def extract_dictionary_from_text_file(self, file_path):
        section_dict = {}
        text = open(file_path, 'r').read()
        target_sections = ["Description", "Overview", "Timeline", "Scope", "Team"]
        pattern = r'(' + '|'.join(re.escape(name) for name in target_sections) + '):'

        # Find all the section headers and split the text accordingly
        sections = re.split(pattern, text)

        # The first element in sections is either empty or non-useful text before the first header
        sections = sections[1:]  # Skip the first element as it would be empty or non-section text

        # Creating a dictionary to store the section names and their corresponding content

        # Iterate over the list in pairs: section name and its corresponding text
        for i in range(0, len(sections), 2):
            section_name = sections[i].strip()  # Section name (e.g., 'Description')
            section_content = sections[i+1].strip()  # Corresponding content

            if section_name in target_sections:  # Only add the section if it's in the predefined list
                section_dict[section_name] = section_content
        self.content_dict = section_dict
        return section_dict
    
    def get_score(self,prompt):
        url = "http://localhost:11434/api/chat"
        # The JSON data that would be sent in the POST request
        data = {
            "model": "llama3",
            "messages": [
                { "role": "user", "content": prompt }
            ],
            "stream": False
        }
        # Send the POST request
        response = requests.post(url, json=data)

        # Check if the request was successful
        if response.status_code == 200:
            # Return the JSON response
            print(response.json()["message"]["content"])
            pattern = r"Grade:\s*([A-Za-z0-9\+]+)\s*(.*)"
            match = re.search(pattern, response.json()["message"]["content"])
            return [match.group(1),match.group(2)]
            
        else:
            print(f"Error: {response.status_code}, {response.text}")
            return "-1"
        
    def get_scores_for_all_section(self):
        print(f"\nStart grading for {self.file_path}...")
        # self.extract_dictionary_from_text_file(self.file_path)
        result_df = {
            "filename": [],
            "AI_Grade": [],
            "Comment": [],
            "Section": [],
            "Criteria": []
        }
        result_df = pd.DataFrame(result_df)
        for section in self.sections:
            for criteria in self.grading_criterias[section]:
                prompt = self.generatePrompt(section,criteria)
                [score,comment] = self.get_score(prompt)
                result_df.loc[len(result_df.index)] = [self.file_path,score,comment,section,criteria]
        print(f"Finished grading {self.file_path}.")
        return result_df 


In [8]:
def list_files_in_directory(directory):
    files_list = []
    for filename in os.listdir(directory):
        # Join the directory path with the filename to get the full file path
        file_path = os.path.join(directory, filename)
        files_list.append(file_path)
    return files_list

In [9]:
files_list = list_files_in_directory("auto_grading_trainingset\input_files")


  files_list = list_files_in_directory("auto_grading_trainingset\input_files")


In [20]:
# test with one text file first to see how prompts work
submission = Submission(file_path="auto_grading_trainingset\input_files\input11.txt",
                                grading_prompts=grading_prompts,
                                grading_criterias=criteria_dict)
submission_res = submission.generatePrompt("Timeline",criteria_dict["Timeline"][0])
print(submission_res)


You are grading a project Timeline. This is the criteria: 1. Does the value of the each task list key tasks?. Give the grade in range 0 to 5. If the submission include the information needed in criteria,
                                                grade it 3. If the information is very detailed, more than 2 sentences, grade it 5. If the information specified in
                                               criteria was not provided in the submission, even if the criteria is partially met, grade it 0.
                                               Output with format: Grade: [grade]. Comment: [comment]


Grade this submission:
Description of Task and Completion Dates
Task 1 Conduct a training for all staff on HRSN and PCAM, and include expectations for the use of the PCAM,
Task 2 Work with internal EHR team to streamline process for adding Z codes
Task 3 Run reports on PCAM completion and establish baseline
Task 4 Run and track PCAM completion and if completion is not improving com

  submission = Submission(file_path="auto_grading_trainingset\input_files\input11.txt",


In [13]:
print(submission.get_score(submission_res))

Grade: 3. Comment: The submission includes the key tasks listed along with their corresponding completion dates, which meets the criteria. However, the description of each task is brief and does not provide detailed information about what needs to be done or how it will be accomplished.
['3', '. Comment: The submission includes the key tasks listed along with their corresponding completion dates, which meets the criteria. However, the description of each task is brief and does not provide detailed information about what needs to be done or how it will be accomplished.']


In [28]:
submission_res_list = []
error_file_list = []
for file_path in files_list:
    if file_path not in submission_results['filename'].unique().tolist():
        try:
            submission = Submission(file_path=file_path,
                                    grading_prompts=grading_prompts,
                                    grading_criterias=criteria_dict)
            submission_res = submission.get_scores_for_all_section()
            submission_res_list.append(submission_res)
            print(f"Finished {len(submission_res_list)} / {len(files_list)}")
        except Exception as e: 
            print(e)
            error_file_list.append(file_path)
print("Error files: ")
print(error_file_list)


Start grading for auto_grading_trainingset\input_files\input15.txt...
Grade: 5. Comment: The submission includes a detailed description of the project name, which meets the criteria. Additionally, it provides more than 2 sentences explaining the purpose and importance of the screening process, demonstrating a thorough understanding of the project's goals and objectives.
Grade: 5
Comment: The submission meets all the criteria, providing a clear description of the clinic's name ("Mercy Grace Private Practice") and including more than two sentences of detailed information. The language is also coherent and easy to understand, making it a strong submission that deserves a high grade.
Grade: 3
Comment: The submission includes the name of the "Process:" attribute, which is "Setting up policies and procedures to introduce screening to our pediatric population at their wellness visits." This meets the criteria, but it's not very detailed. Therefore, I'm grading it a 3.
Grade: 3. Comment: The 

In [30]:
submission_results = pd.concat(submission_res_list, axis=0)
submission_results

Unnamed: 0,filename,AI_Grade,Comment,Section,Criteria
0,auto_grading_trainingset\input_files\input.txt,5,Comment: The submission meets all the criteria...,Description,"1. Does the name of the ""Project Name:"" attrib..."
1,auto_grading_trainingset\input_files\input.txt,5,. Comment: The submission provides a detailed ...,Description,"2. Does the name of the ""Clinic:"" attribute sp..."
2,auto_grading_trainingset\input_files\input.txt,3,Comment: The submission does include the name ...,Description,"3. Does the name of the ""Process:"" attribute s..."
3,auto_grading_trainingset\input_files\input.txt,5,. Comment: The submission clearly specifies th...,Description,"4. Does the name of the ""TIP 2.0 Process Miles..."
4,auto_grading_trainingset\input_files\input.txt,5,Comment: The submission provides specific deta...,Description,"5. Does the name of the ""TIP 2.0 Process Miles..."
...,...,...,...,...,...
19,auto_grading_trainingset\input_files\input9.txt,3,". Comment: The submission does contain the ""Pr...",Team,"4. Does the value of the ""Process Manager:"" at..."
20,auto_grading_trainingset\input_files\input9.txt,5,". Comment: The value of the ""Stakeholders"" att...",Team,"5. Does the value of the ""Stakeholders"" ""stake..."
21,auto_grading_trainingset\input_files\input9.txt,3,. Comment: The submission includes the necessa...,Team,"6. Do the ""Stakeholders"" contain values for th..."
22,auto_grading_trainingset\input_files\input9.txt,3,". Comment: The submission includes the ""Projec...",Team,"7. Does the value of the ""Project Team Members..."


In [31]:
submission_results.to_csv("test_result_on_14_submissions_prompts_v4.csv")

In [28]:
section_options = criteria_dict.keys()

In [29]:
from tkinter import *
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
import re
import requests
import threading
import os
import pandas as pd

class Application(Frame):

    def __init__(self, master=None, Frame=None, criteria="", criteria_dict={},grading_prompts={}):
        self.criteria = criteria
        self.criteria_dict = criteria_dict
        self.grading_prompts = grading_prompts
        self.submission = ""
        self.submissions = []
        self.grade_csv = []
        self.prompt = ""
        Frame.__init__(self, master)
        super(Application,self).__init__()
        self.grid(column = 5,row = 20,padx = 50,pady = 50)
        self.createWidgets()

    def getUpdateData(self,  event):
        self.CriteriaCombo['values'] = self.criteria_dict[self.SectionCombo.get()]
        self.CriteriaCombo.current()

    def parseTextFile(self, text):
        pattern = r'(' + '|'.join(re.escape(name) for name in self.criteria_dict.keys()) + '):'

        # Find all the section headers and split the text accordingly
        sections = re.split(pattern, text)

        # The first element in sections is either empty or non-useful text before the first header
        sections = sections[1:]  # Skip the first element as it would be empty or non-section text

        # Creating a dictionary to store the section names and their corresponding content
        section_dict = {}

        # Iterate over the list in pairs: section name and its corresponding text
        for i in range(0, len(sections), 2):
            section_name = sections[i].strip()  # Section name (e.g., 'Description')
            section_content = sections[i+1].strip()  # Corresponding content

            if section_name in self.criteria_dict.keys():  # Only add the section if it's in the predefined list
                section_dict[section_name] = section_content
        
        return section_dict

    def getFileContent(self):
        file_path = filedialog.askopenfilename(title="Select a file", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
        if file_path:
            # Process the selected file (you can replace this with your own logic)
            # print("Selected file:", file_path)
            s = open(file_path, 'r').read()
            content_dict = self.parseTextFile(s)
            # print("Content: ", content_dict[self.SectionCombo.get()])
            self.submission = content_dict[self.SectionCombo.get()]
            self.status.config(text="imported " + file_path)

    def getFolderContent(self):
        folder_path = filedialog.askdirectory()
        if folder_path:
            # Process the selected file (you can replace this with your own logic)
            # List all items in the folder
            all_items = os.listdir(folder_path)
            self.status.config(text="imported " + folder_path)
            # Filter and return only files (not directories)
            files = [os.path.join(folder_path, f) for f in all_items if os.path.isfile(os.path.join(folder_path, f))]
            print("All files: ", all_items)
            self.submissions = files
            self.status.config(text="imported " + folder_path)
            return files
    def setCriteria(self):
        self.criteria = self.CriteriaCombo.get()

    def generatePrompt(self, prompt_submission):
        print("Section: ", self.SectionCombo.get())
        print("criteria: ", self.CriteriaCombo.get())
        prompt = grading_prompts[self.SectionCombo.get()].format(criteria=self.CriteriaCombo.get()) + prompt_submission
        # print(prompt)
        self.prompt = prompt

    def call_api(self):
        print("calling api")
        url = "http://localhost:11434/api/chat"
        # The JSON data that would be sent in the POST request
        data = {
            "model": "llama3",
            "messages": [
                { "role": "user", "content": self.prompt }
            ],
            "stream": False
        }
        # Send the POST request
        response = requests.post(url, json=data)

        # Check if the request was successful
        if response.status_code == 200:
            # Return the JSON response
            print(response.json()["message"]["content"])
            pattern = r"Grade:\s*([\d\.]+)"
            match = re.search(pattern, response.json()["message"]["content"])
            return match.group(1)
            
        else:
            print(f"Error: {response.status_code}, {response.text}")

    def getComment(self):
        self.generatePrompt(self.submission)
        self.status.config(text="Running...")
        
        def api_thread():
            response = self.call_api()
            self.status.config(text="Grade: " + response)
            self.submission = ""
        thread = threading.Thread(target=api_thread)
        thread.start()
        

    def getCommentForFolder(self, path, content,completed):
        print("\nGetting comment for file: ",path)
        self.generatePrompt(content)
        self.status.config(text="Running...")
        
        def api_thread():
            response = self.call_api()
            self.grade_csv.append([path,response,self.SectionCombo.get(),self.CriteriaCombo.get()])
            print("current grade_csv: ",self.grade_csv)
            self.status.config(text="Running...Completed" + str(completed) + "/" + str(len(self.submissions)))
            if len(self.grade_csv) == len(self.submissions):
                print("\nSelf.grade_csv: ",len(self.grade_csv))
                print("self.submissions: ",len(self.submissions))
                filename = "grade_result_" + self.SectionCombo.get() + "_" + str(self.criteria_dict[self.SectionCombo.get()].index(self.CriteriaCombo.get()))
                pd.DataFrame(self.grade_csv,columns=["File","Grade","Section","Criteria"]).to_csv(filename+".csv")
                self.status.config(text="Exported csv file to " + filename+".csv")
                self.submissions = []
                self.grade_csv = []
        thread = threading.Thread(target=api_thread)
        thread.start()
        
    def getGradeResults(self):
        print("Number of files: ",len(self.submissions))
        for i in range(len(self.submissions)):
            file_path = self.submissions[i]
            print("file path: ", file_path)
            s = open(file_path, 'r').read()
            file_content_dict = self.parseTextFile(s)
            print("File content: ",file_content_dict)
            file_content = file_content_dict[self.SectionCombo.get()]
            self.getCommentForFolder(file_path,file_content,i)

    def getAIfeedback(self):
        print("self.submissions:", self.submissions)
        if len(self.submissions):
            print("Grade whole folder")
            self.getGradeResults()
        else:
            self.getComment()

    def createWidgets(self):
        Label(text = 'Section:').grid(row = 1,column = 1,padx = 10)
        Label(text = 'Criteria:').grid(row = 2,column = 1,padx = 10)
        Label(text = 'Import File:').grid(row = 3,column = 1,padx = 10)
        # Criteria list
        self.CriteriaCombo = ttk.Combobox( width = 15)
        self.CriteriaCombo.grid(row = 2,column = 2,pady = 25,padx = 10)
        self.CriteriaCombo.bind('<<ComboboxSelected>>', self.setCriteria())

        # Section list
        self.SectionCombo = ttk.Combobox(width = 15,  values = list(criteria_dict.keys()))
        self.SectionCombo.bind('<<ComboboxSelected>>', self.getUpdateData)
        self.SectionCombo.grid(row = 1,column = 2,padx = 10,pady = 25)

        # Import File button
        self.importButton = ttk.Button(text="Import 1 File", command=self.getFileContent)
        self.importButton.grid(column=2,row=3)

        # Import Folder buttonA
        self.importButton = ttk.Button(text="Import whole Folder", command=self.getFolderContent)
        self.importButton.grid(column=3,row=3)

        # # Generate Prompt button
        # self.generatePromptButton = ttk.Button(text="Generate Prompt", command=self.generatePrompt)
        # self.generatePromptButton.grid(column=2,row=4)

        # Get feedback from AI model
        self.getCommentButton = ttk.Button(text="Get feedback from AI model", command=self.getAIfeedback)
        self.getCommentButton.grid(column=2,row=5)

        # Status
        self.status = Label(text="")
        self.status.grid(column=2, row=6)
app = Application()
app.master.title('Generate Prompts for Llama3')
app.criteria = ""
app.criteria_dict = criteria_dict
app.grading_prompts = grading_prompts
app.mainloop()