In [4]:
import os
import nbformat
import google.generativeai as genai
from nbformat import read
from dotenv import load_dotenv

load_dotenv(r'path\to\your\.env')  
# Configuration
FOLDER_PATH = r"path\to\your\code folder"
PROMPT = """
**Context:**  
As a data analyst, you have been given a Student Performance dataset that contains the factors influencing academic student performance. The dataset consists of 10,000 student records, with each record containing information about various factors and performance index as follows:
•	Hours Studied (X1): The total number of hours spent studying by each student.
•	Previous Scores (X2): The scores obtained by students in previous tests.
•	Extracurricular Activities (X3): Whether the student participates in extracurricular activities (Yes: 1 or No: 0).
•	Sleep Hours (X4): The average number of hours of sleep the student had per day.
•	Sample Question Papers Practiced (X5): The number of sample question papers the student had practiced.
•	Performance Index (X6): A measure of the overall performance of each student.
 
Using the dataset provided, write a python code (Including the use of Pandas, Numpy, Matplotlib, and Statsmodels libraries) in Jupyter Notebook and perform the following:
a)	Load the CSV file for the dataset.	(5 marks)
b)	Apply linear regression model based on the number of independent variables available in the dataset.	(30 marks)
c)	Present the correlation between the independent variable and the dependent variable by plotting a scatter plot. These independent variables include X1, X2, and X4. Add the regression line (colored in red) in each plot.	(30 marks)
d)	Apply multiple linear regression model which including all the independent variable available. Using your model, predict the Performance Index of new student who has the following factors:
•	X1: 7
•	X2: 90
•	X3: Yes
•	X4: 4
•	X5: 1	(25 marks)
e)	The Actual Value of Performance Index for new student is 76. Present whether the Predicted Value you obtained from d) is the same as Actual Value. Illustrate your reasoning.	(10 marks)



**Grading Instructions:**  
- [ ] Loads CSV (5 marks)  
- [ ] Creates multiple regression with all 5 variables (30 marks)  
- [ ] Generates correct plots (30 marks)  
- [ ] Makes prediction (25 marks)  
- [ ] Validates prediction (10 marks) 
**Notes:**  
- Deduct 5 marks if using scikit-learn instead of statsmodels  
- Deduct 10 marks if plots lack red regression lines  
- Ensure X3 is encoded numerically (Yes=1/No=0)  

**Output Format:**  
Create a markdown report with:  
1. Sections for each part (a-e)  
2. Feedback per checklist item  
3. Marks awarded per section  
4. Total marks at the end  
"""

def process_notebooks():
    # Initialize Gemini
    genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
    model = genai.GenerativeModel('gemini-2.0-pro-exp')

    for filename in os.listdir(FOLDER_PATH):
        if not filename.endswith(".ipynb"):
            continue

        try:
            # Parse class and student ID
            class_name, student_id = filename.split('_', 1)
            student_id = student_id.replace('.ipynb', '')
            
            # Create class directory
            class_dir = os.path.join(FOLDER_PATH, class_name)
            os.makedirs(class_dir, exist_ok=True)

            # Read notebook
            nb_path = os.path.join(FOLDER_PATH, filename)
            with open(nb_path, 'r', encoding='utf-8') as f:
                nb = read(f, as_version=4)
            
            # Extract code cells
            code = '\n'.join([cell.source for cell in nb.cells if cell.cell_type == 'code'])

            # Generate assessment
            response = model.generate_content(PROMPT + "\n\nStudent Code:\n" + code)
            
            # Save results
            output_path = os.path.join(class_dir, f"{student_id}.md")
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(response.text)
                print(f"Graded {filename} -> {output_path}")

        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")

if __name__ == "__main__":
    process_notebooks()


Graded class1_1132.ipynb -> C:\Users\lewka\Downloads\answer\class1\1132.md
Graded class1_1ab12.ipynb -> C:\Users\lewka\Downloads\answer\class1\1ab12.md
Graded class2_1121.ipynb -> C:\Users\lewka\Downloads\answer\class2\1121.md
Graded class2_1132 - Copy.ipynb -> C:\Users\lewka\Downloads\answer\class2\1132 - Copy.md
