In [None]:
import os
import ollama
from datetime import datetime
import csv
import re

def split_and_clean_code_file(file_path, output_folder_path):
    
    with open(file_path, 'r') as file:
        content = file.read()

    first_assessment_pos = content.find('/** ASSESSMENT')
    
    if first_assessment_pos != 0:
        before_assessment = content[:first_assessment_pos].strip()
        content = content[first_assessment_pos:] 
        #content = f"/** ASSESSMENT */\n{before_assessment}\n{content}"  
        
    # Determine whether to remove scores and feedback based on the filename
    remove_comments = 'refcode' not in os.path.basename(file_path).lower()
    
    # Split the content based on the /** ASSESSMENT comment
    if remove_comments:
        sections = re.split(r'/\*\* ASSESSMENT.*?\*/', content, flags=re.DOTALL)
    else:
        pattern = r'(/\*\* ASSESSMENT.*?\*/)(.*?)(?=\s/\*\* ASSESSMENT|$)'
        matches = re.finditer(pattern, content, flags=re.DOTALL)
        sections = []
        for match in matches:
            comment_block, text = match.groups()
            sections.append(f'{comment_block}\n{text.strip()}')
    
    # Process each section
    for i, section in enumerate(sections):

        if (first_assessment_pos != 0) and (i == 1):
            section = f"\n{before_assessment}\n{section}"
          
        
        cleaned_section = section.strip()

        if not cleaned_section:
            continue
        
        if remove_comments:
            inx = i
        else:
            inx = i + 1

        segment_folder_path = os.path.join(output_folder_path, f'segment_{inx}')
        os.makedirs(segment_folder_path, exist_ok=True)

        base_filename = os.path.splitext(os.path.basename(file_path))[0]
        output_file_path = os.path.join(segment_folder_path, f'{base_filename}_segment_{inx}.txt')

        with open(output_file_path, 'w') as output_file:
            output_file.write(cleaned_section)
            
def process_code_files(base_folder_path):
    
    os.makedirs(base_folder_path, exist_ok=True)
    
    for filename in os.listdir(base_folder_path):
        if filename.endswith(('.txt', '.java', '.cpp')): 
            file_path = os.path.join(base_folder_path, filename)
            split_and_clean_code_file(file_path, base_folder_path)

# Function to read code files from the folder
def read_code_files(folder_path):
    code_files = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt") or filename.endswith(".java") or filename.endswith(".cpp"):
            with open(os.path.join(folder_path, filename), 'r') as file:
                code = file.read()
                code_files.append((filename, code))
    return code_files

# Function to create the assessment prompt
def create_assessment_prompt(student_code, ref_codes):
    refcode_details = "\n\n".join([f"Reference Code ({i+1}):\n{code}" for i, code in enumerate(ref_codes)])
    prompt = (
        "You are an AI designed to assess entry-level programming exams at an academic level. "
        "Your expertise lies in segmenting the answer codes based on the most similar reference code, assessing each segment of code under the \"ASSESSMENT\" comments "
        "and grading it based on the grade in the most similar reference code. "
        "You can use both dynamic and static code assessment. "
        "You can also use abstract syntax trees, control flow graphs, and data flow graphs of each segment to make assessments properly. "
        "Your main goal is to assess the code like an instructor and grade it even if it is not correct totally. "
        "Follow the steps below.\n\n"
        "Step 1 - Perform an assessment for the entire code at once and create only one grade by comparing the answer code to the reference code.\n"
        "Step 2 - Assess and grade the codes by using dynamic and static code assessment methods.\n"
        "Step 3 - Support your assessment by comparing codes using their graph models; abstract syntax trees, control flow graphs, and data flow graphs.\n"
        "Step 4 - Grades should be integer and not bigger than the reference code's segment grade.\n\n"
        "Step 5 - Do not show titles and any extra details in your answer. Provide only the final grade.\n\n"
        f"Reference Codes:\n{refcode_details}\n\n"
        f"Student Code:\n{student_code}\n\n"
    )
    return prompt

# Function to create and use Ollama Client with persistent session
def assess_code_with_client(client, student_code, ref_codes):
    prompt = create_assessment_prompt(student_code, ref_codes)
    
    try:
        response = client.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'top_p': 0.1},
            stream=True,
        )

        result_text = ""
        for chunk in response:
            result_text += chunk['message']['content']
        
        return result_text.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error occurred during assessment."

# Assess all code files and store the results
def assess_all_code_files(folder_path):
    code_files = read_code_files(folder_path)
    ref_codes = [(filename, code) for filename, code in code_files if filename.lower().startswith("refcode")]
    student_codes = [(filename, code) for filename, code in code_files if not filename.lower().startswith("refcode")]
    results = []

    # Create a persistent Ollama client
    client = ollama.Client() 

    for filename, code in student_codes:
        assessment = assess_code_with_client(client, code, [ref_code for _, ref_code in ref_codes])
        print((filename[:-4]) + " " + assessment)
        results.append((filename[:-4], assessment))
    
    return results

if __name__ == "__main__":
    current_dir = os.path.dirname(os.path.abspath("__file__"))
    folder_name = input("Enter the folder name: ")
    folder_path = os.path.join(current_dir, folder_name)

    if not os.path.exists(folder_path):
        print("Folder does not exist!")
    else:
        
        # Split code file into segments data\Midterm2-Annotated (1)\Q1
        process_code_files(folder_path)
        
        # Stop = input("Continue!")
        
        for root, dirs, files in os.walk(folder_path):
            for dir_name in dirs:
                if "segment" in dir_name.lower():  # Check if the folder name includes "segment"
                    print(datetime.now().strftime("%Y%m%d_%H%M%S"))
                    segment_folder_path = os.path.join(root, dir_name)
                    print(f"Assessing folder: {segment_folder_path}")
                    results = assess_all_code_files(segment_folder_path)
                    print(datetime.now().strftime("%Y%m%d_%H%M%S"))
                    print(results)
                
                    # Process the results for CSV output
                    processed_data = [result.split() for filename, result in results]
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = os.path.join(segment_folder_path, f"assessment_{timestamp}.csv")
                
                    with open(filename, mode='w', newline='') as file:
                        writer = csv.writer(file)
                        writer.writerows(processed_data)
                
                    print(f"Data has been written to {filename}")


Enter the folder name: data\Midterm2-Annotated (1)\Q1
20240901_001010
Assessing folder: C:\Users\umitk\OneDrive\Documents\Python Scripts\data\Midterm2-Annotated (1)\Q1\segment_1
S018062_segment_1 3
S020362_segment_1 4
S020489_segment_1 4
S020916_segment_1 2
S021448_segment_1 3
S021760_segment_1 3
S023287_segment_1 3
S023370_segment_1 3
S023756_segment_1 4
S024015_segment_1 4
S024073_segment_1 3
S024077_segment_1 4
S024170_segment_1 4
S024206_segment_1 4
S024401_segment_1 4
S024417_segment_1 4
S024661_segment_1 3
S024667_segment_1 4
S024712_segment_1 4
S024716_segment_1 4


In [None]:
Enter the folder name: data\Midterm2-Annotated (1)\Q1
20240808_092301
S018062_segment_4 File name: Assignment1
5 5
S020489_segment_4 file1 4
S020916_segment_4 Reference Code (1): 5

File name and grade: Reference Code (1) 3
S021448_segment_4 Reference Code (1): 5 

File name: Reference Code (1) Student Code 
Grade: 4
S021760_segment_4 File name and grade: CodeAssessment 4 3
S023287_segment_4 File1 3
S023370_segment_4 file1 3
S023756_segment_4 File1 4
S024015_segment_4 file1 4
S024073_segment_4 Reference Code (1) 5.0 

Note: The grade is based on the similarity of the student code to the reference code, considering both dynamic and static aspects of the code.
S024077_segment_4 Reference Code (1): 5
File name: Reference Code (1).java 3
S024712_segment_4 Reference Code (1): 5
Student Code: 4
S024716_segment_4 File1 3
S024880_segment_4 Reference Code (1): 5
File name: Reference Code (1) Student Code 5
S024984_segment_4 File1 3
S025015_segment_4 File name: ReferenceCode1.java 5
S025176_segment_4 File name: Reference Code (1).txt 5
S025273_segment_4 File name: Reference Code (1).txt 4
S025397_segment_4 File name: ReferenceCode1.java 5
S026659_segment_4 File1 4
S026665_segment_4 File1 4
S028260_segment_4 File name: ReferenceCode1.java 5
S028272_segment_4 Reference Code (1) 3
S028318_segment_4 File1 4
S028412_segment_4 Reference Code (1): 5 

File name: Reference Code (1) Student Code 
Grade: 4
S028443_segment_4 file1 3
S028586_segment_4 File name: 1
5
S028693_segment_4 File name and grade: 5
S028744_segment_4 Reference Code (1): 5 

Note: The grade is based on the similarity of the student code to the reference code.
S028866_segment_4 Reference Code (1) 5 

Note: The grade is based on the entire code, not just one segment.
S028892_segment_4 Reference Code (1) 5
S028953_segment_4 Reference Code (1) 5
S029107_segment_4 File1 3
S029223_segment_4 File name and grade: code1 4
S029394_segment_4 Reference Code (1): 5 

File name: Reference Code (1) Student Code 
Grade: 4
S030890_segment_4 File1 4
S031912_segment_4 File1 4
S032929_segment_4 File1 4
S033428_segment_4 File 1: 4
S033747_segment_4 file1 2
S033855_segment_4 File name: ReferenceCode1.java 5
S033863_segment_4 File name and grade: CodeAssessment 4/5
S033888_segment_4 Reference Code (1): 5 

Note: The grade is based on the similarity of the student code to the reference code.
S033937_segment_4 File 1 4
S034088_segment_4 Reference Code (1): 5 

File name: Reference Code (1) Student Code 
Grade: 4
S034149_segment_4 Reference Code (1) 2
S034167_segment_4 File 1 4
S034244_segment_4 Reference Code (1): 5
File name: Reference Code (1)
S034350_segment_4 File1 4
S034387_segment_4 CodeAssessment.txt
Reference Code 1 2
S034413_segment_4 Reference Code (1) 5
S034436_segment_4 Reference Code (1): 5
S034505_segment_4 Reference Code (1): 5
Student Code: 4
S034506_segment_4 Reference Code (1): 5 

File name: Reference Code (1) Student Code 
Grade: 4
S034549_segment_4 Reference Code (1): 5
File name: Reference Code (1).cpp 5
S034556_segment_4 file1 3
S039529_segment_4 File1 3
20240808_094025
[('S018062_segment_4', 'File name: Assignment1\n5 5'), ('S020489_segment_4', 'file1 4'), ('S020916_segment_4', 'Reference Code (1): 5\n\nFile name and grade: Reference Code (1) 3'), ('S021448_segment_4', 'Reference Code (1): 5 \n\nFile name: Reference Code (1) Student Code \nGrade: 4'), ('S021760_segment_4', 'File name and grade: CodeAssessment 4 3'), ('S023287_segment_4', 'File1 3'), ('S023370_segment_4', 'file1 3'), ('S023756_segment_4', 'File1 4'), ('S024015_segment_4', 'file1 4'), ('S024073_segment_4', 'Reference Code (1) 5.0 \n\nNote: The grade is based on the similarity of the student code to the reference code, considering both dynamic and static aspects of the code.'), ('S024077_segment_4', 'Reference Code (1): 5\nFile name: Reference Code (1).java 3'), ('S024712_segment_4', 'Reference Code (1): 5\nStudent Code: 4'), ('S024716_segment_4', 'File1 3'), ('S024880_segment_4', 'Reference Code (1): 5\nFile name: Reference Code (1) Student Code 5'), ('S024984_segment_4', 'File1 3'), ('S025015_segment_4', 'File name: ReferenceCode1.java 5'), ('S025176_segment_4', 'File name: Reference Code (1).txt 5'), ('S025273_segment_4', 'File name: Reference Code (1).txt 4'), ('S025397_segment_4', 'File name: ReferenceCode1.java 5'), ('S026659_segment_4', 'File1 4'), ('S026665_segment_4', 'File1 4'), ('S028260_segment_4', 'File name: ReferenceCode1.java 5'), ('S028272_segment_4', 'Reference Code (1) 3'), ('S028318_segment_4', 'File1 4'), ('S028412_segment_4', 'Reference Code (1): 5 \n\nFile name: Reference Code (1) Student Code \nGrade: 4'), ('S028443_segment_4', 'file1 3'), ('S028586_segment_4', 'File name: 1\n5'), ('S028693_segment_4', 'File name and grade: 5'), ('S028744_segment_4', 'Reference Code (1): 5 \n\nNote: The grade is based on the similarity of the student code to the reference code.'), ('S028866_segment_4', 'Reference Code (1) 5 \n\nNote: The grade is based on the entire code, not just one segment.'), ('S028892_segment_4', 'Reference Code (1) 5'), ('S028953_segment_4', 'Reference Code (1) 5'), ('S029107_segment_4', 'File1 3'), ('S029223_segment_4', 'File name and grade: code1 4'), ('S029394_segment_4', 'Reference Code (1): 5 \n\nFile name: Reference Code (1) Student Code \nGrade: 4'), ('S030890_segment_4', 'File1 4'), ('S031912_segment_4', 'File1 4'), ('S032929_segment_4', 'File1 4'), ('S033428_segment_4', 'File 1: 4'), ('S033747_segment_4', 'file1 2'), ('S033855_segment_4', 'File name: ReferenceCode1.java 5'), ('S033863_segment_4', 'File name and grade: CodeAssessment 4/5'), ('S033888_segment_4', 'Reference Code (1): 5 \n\nNote: The grade is based on the similarity of the student code to the reference code.'), ('S033937_segment_4', 'File 1 4'), ('S034088_segment_4', 'Reference Code (1): 5 \n\nFile name: Reference Code (1) Student Code \nGrade: 4'), ('S034149_segment_4', 'Reference Code (1) 2'), ('S034167_segment_4', 'File 1 4'), ('S034244_segment_4', 'Reference Code (1): 5\nFile name: Reference Code (1)'), ('S034350_segment_4', 'File1 4'), ('S034387_segment_4', 'CodeAssessment.txt\nReference Code 1 2'), ('S034413_segment_4', 'Reference Code (1) 5'), ('S034436_segment_4', 'Reference Code (1): 5'), ('S034505_segment_4', 'Reference Code (1): 5\nStudent Code: 4'), ('S034506_segment_4', 'Reference Code (1): 5 \n\nFile name: Reference Code (1) Student Code \nGrade: 4'), ('S034549_segment_4', 'Reference Code (1): 5\nFile name: Reference Code (1).cpp 5'), ('S034556_segment_4', 'file1 3'), ('S039529_segment_4', 'File1 3')]
Data has been written to C:\Users\umitk\OneDrive\Documents\Python Scripts\data\Midterm2-Annotated (1)\Q1\segment_4\assessment_20240808_094025.csv

57 dosya
17 dk

1 segment 30 sn

In [2]:
import re

content = """
/** ASSESSMENT
This is a test section
of the content.
*/
Some other content here
/** ASSESSMENT
Another test section
with more details.
*/
End of content
"""

# Define a pattern that captures the comment block and the following text
pattern = r'(/\*\* ASSESSMENT.*?\*/)(.*?)(?=\s/\*\* ASSESSMENT|$)'

matches = re.finditer(pattern, content, flags=re.DOTALL)

# Initialize a list to hold the sections with comment blocks
sections = []

# Iterate through all matches
for match in matches:
    comment_block, text = match.groups()
    sections.append(f'{comment_block}{text.strip()}')

# Output the results
for i, section in enumerate(sections, start=1):
    print(f"Section {i}:\n{section}\n")


Section 1:
/** ASSESSMENT
This is a test section
of the content.
*/Some other content here

Section 2:
/** ASSESSMENT
Another test section
with more details.
*/End of content

