# Rename directory

In [1]:
import os

# Change the current working directory to the folder containing the student folders
os.chdir('./Transcripts/091624')

# Get a list of all entries in the current directory
entries = os.listdir('.')

for entry in entries:
    if os.path.isdir(entry):
        # Split the folder name using ' - ' as the delimiter
        parts = entry.split(' - ')
        if len(parts) >= 3:
            # Extract the student name (second part)
            student_name = parts[1]
            # Remove spaces from the student name
            student_name_no_spaces = student_name.replace(' ', '')

            # Build new folder name
            new_folder_name = student_name_no_spaces

            # First, rename the CSV file inside the folder to "ai_transcripts.csv"
            folder_path = os.path.join('.', entry)
            files_in_folder = os.listdir(folder_path)
            for filename in files_in_folder:
                if filename.endswith('.csv'):
                    old_file_path = os.path.join(folder_path, filename)
                    new_file_path = os.path.join(folder_path, 'ai_transcripts.csv')
                    os.rename(old_file_path, new_file_path)
                    break  # Assuming there's only one CSV file per folder

            # Then rename the folder
            os.rename(entry, new_folder_name)

# change the directory back to the original directory
os.chdir('../..')


# Check file validity, contact flagged student to reupload csv file

In [3]:
import os
import pandas as pd

# Specify the directory containing the student folders
base_directory = './Transcripts/091624'

# List to keep track of students whose files couldn't be read
failed_students = []

# List of encodings to try
encodings_to_try = ['utf-8', 'ISO-8859-1', 'windows-1252', 'latin1', 'utf-16', 'ascii']

# Get a list of all entries in the base directory
entries = os.listdir(base_directory)

for entry in entries:
    student_folder = os.path.join(base_directory, entry)
    if os.path.isdir(student_folder):
        csv_file_path = os.path.join(student_folder, 'ai_transcripts.csv')
        if os.path.exists(csv_file_path):
            # Try reading the CSV file with different encodings
            for encoding in encodings_to_try:
                try:
                    df = pd.read_csv(csv_file_path, encoding=encoding, on_bad_lines='skip')
                    break  # Exit the loop after successful read
                except Exception as e:
                    pass
            else:
                # If none of the encodings worked, add the student to the failed list
                print(f"Could not read '{csv_file_path}' with any of the tried encodings.")
                failed_students.append(entry)
        else:
            print(f"No 'ai_transcripts.csv' found in '{student_folder}'.")
    else:
        print(f"Skipping non-directory entry: '{student_folder}'")

# After processing all files, print a summary of failures
if failed_students:
    print("\nThe following students' files could not be read with any of the tried encodings:")
    for student in failed_students:
        print(f"- {student}")
else:
    print("\nAll files were successfully read with one of the tried encodings.")


Skipping non-directory entry: './Transcripts/091624/index.html'
Skipping non-directory entry: './Transcripts/091624/.DS_Store'
Could not read './Transcripts/091624/AlaraKaymak/ai_transcripts.csv' with any of the tried encodings.

The following students' files could not be read with any of the tried encodings:
- AlaraKaymak


# Rubrics

In [None]:
'''refer to rubric generation code'''

# Grading

In [2]:
import os
import pandas as pd
from openai import OpenAI
import PyPDF2

# Set your OpenAI API key
client = OpenAI(api_key='sk-...')

def generate_feedback(conversation_df, rubric=""):

    if conversation_df.shape[1] >= 2:
        # Combine user prompts (column 0) and AI responses (column 1) for the prompt
        conversation_text = "\n".join([f"Student: {row[0]}\nAI: {row[1]}" for _, row in conversation_df.iterrows()])
    else:
        print("Conversation DataFrame does not have at least 2 columns.")
        return None
    
    # Create a message for the API
    messages = [
        {"role": "system", "content": f"You are an AI grader to evaluate student asslignments for a class called Data Science Applications in Business. In this assignment, students are asked to chat with AI chatbot to learn about relevant topics. You will be reading each student's conversation with Ai loaded from a csv file. Students will be graded on a scale of 0-10, with each grade initializing at 10 with a simple 'well done' feedback unless clearly not meeting requirements within the following rubric: {rubric}, in which case you should provide clear and detailed feedback on why the student received point(s) deduction. Feedback format: 'Grade: x/10. Feedback: (either 'well done' or specific feedback based on reasons for points deduction)'. "},
        
        {"role": "user", "content": f"""
         Grade the student's following conversation and provide feedback based on my provided instructions. 
        Here is the conversation:
        {conversation_text}
        """}
    ]
    
    # Call OpenAI API for feedback (non-streaming)
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=messages,
    )
    
    return response.choices[0].message.content

# Read the rubric from the PDF file
rubric_path = "./Rubrics/Survey_Rubric_Strategy_Prep.pdf"

def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ''
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            return text
    except Exception as e:
        print(f"Failed to read PDF file: {e}")
        return None

rubric = extract_text_from_pdf(rubric_path)

# Specify the directory containing the student folders
base_directory = './Transcripts/091624'

# List of encodings to try
encodings_to_try = ['utf-8', 'ISO-8859-1', 'windows-1252', 'latin1', 'utf-16', 'ascii']



# Get a list of all entries in the base directory
entries = os.listdir(base_directory)

feedback_list = []
fail_list = []
for entry in entries:
    student_folder = os.path.join(base_directory, entry)
    if os.path.isdir(student_folder):
        csv_file_path = os.path.join(student_folder, 'ai_transcripts.csv')
        if os.path.exists(csv_file_path):
            # Try reading the CSV file with different encodings
            for encoding in encodings_to_try:
                try:
                    df = pd.read_csv(csv_file_path, encoding=encoding, on_bad_lines='skip', header=None)
                    break  # Exit the loop after successful read
                except Exception as e:
                    pass
            else:
                # If none of the encodings worked, skip this student
                print(f"Could not read '{csv_file_path}' with any of the tried encodings.")
                continue  # Skip to next student

            # Generate feedback
            feedback = generate_feedback(df, rubric=rubric)
            if feedback is not None:
                # Print feedback
                print(f"\nFeedback for student: {entry}")
                print(feedback)

                # Add to feedback list
                feedback_list.append({'name': entry, 'feedback': feedback})
            else:
                print("\n********************************************************")
                print(f"Failed to generate feedback for student {entry}.")
                fail_list.append(entry)
                print("********************************************************\n")
        else:
            print(f"No 'ai_transcripts.csv' found in '{student_folder}'.")
    else:
        print(f"Skipping non-directory entry: '{student_folder}'")

# Save all feedback to a CSV file in the repository root folder
feedback_df = pd.DataFrame(feedback_list)
output_csv_path = './all_feedback.csv'
feedback_df.to_csv(output_csv_path, index=False)
print(f"\nAll feedback saved to '{output_csv_path}'.")
print(f"fail list: {fail_list}")



Feedback for student: MichaelHaidar
Grade: 10/10. Feedback: Well done. You have demonstrated a comprehensive understanding of data governance and related concepts through your responses. You correctly covered aspects such as the importance of data quality management, the role of data stewards, and the significance of compliance with data protection regulations in a data governance framework. Additionally, you recognized the importance of data lifecycle management. Excellent work in articulating these complex concepts clearly and accurately. Keep up the great work!

Feedback for student: JoshGiobbi
Grade: 10/10. Feedback: Well done.

Feedback for student: NatashaMessier
Grade: 10/10. Feedback: well done

Feedback for student: MaggieTu
Grade: 10/10. Feedback: Well done.

Feedback for student: HarshitaAhuja
Grade: 10/10. Feedback: well done. 

The student demonstrated a comprehensive understanding of SWOT analysis, Porter’s Five Forces, KPIs, and their applications to real-world scenario