In [1]:
from langgraph.graph import StateGraph, START,END
from langchain_ollama import ChatOllama
from typing import TypedDict, Annotated
from pydantic import BaseModel, Field
import operator

In [2]:
model =  ChatOllama(model="gemma3:4b")

In [3]:
class evaluation_schema(BaseModel):
    feedback : str = Field(description = "Detailed feedback for the essay")
    score: int = Field(description = 'Score out of 10', ge=10,le=10)

In [4]:
structured_model = model.with_structured_output(evaluation_schema)

In [5]:
essay = """My Day in City

Yesterday I going to city with my friend for buy some thing. We wake up very late so no time for eat breakfast, only drink tea. After we catch bus but bus is very much crowded and noisy and smell not good. My friend he say we should go walking but I tell no because my leg already pain from yesterday football play.

In city we see many people is selling all kind thing, like fruit, shoe, phone and many animal also. I want to buy mango but seller tell me price too much high so I angry and walk away. Then we going to shop for buy bag, but bag is big and heavy and color not so nice.

We eat lunch in small hotel, food is not so taste good and plate also not clean but we still eat because we hungry too much. Then raining come and we no have umbrella so we become all wet. My shoe making sound chap-chap when walk.

When going back home, bus not coming for long time so we sit on bench and wait. Finally bus come but driver very fast drive and many time we almost fall down. At home I feeling very much tired and my clothes full dirty. It was good and bad day both.

"""

In [6]:
class UPSCState(TypedDict):
    essay : str

    language_feedback : str
    analysis_feedback : str
    clarity_feedback : str
    overall_feedback : str

    individual_scores : Annotated[list[int] , operator.add]

    avg_score : float


In [7]:
def evaluate_language(state: UPSCState):
    prompt = f"""You are a strict and unbiased UPSC evaluator.
Your task is to evaluate the language quality of the candidate’s essay.

Evaluation Criteria (deduct marks for any weakness):

NOTE IMPORTANT : Observe the grammatical errors deduct scores out of 10 accordingly
Score should be justified.

Grammar & Syntax – Correct use of grammar, punctuation, and sentence structure.

Clarity & Precision – Clear, concise, and unambiguous expression of ideas.

Vocabulary & Word Choice – Appropriate, varied, and contextually accurate vocabulary.

Flow & Coherence – Logical connection between sentences and paragraphs.

Formality & Tone – Suitable for a UPSC-level academic/analytical essay.

Scoring Guide (out of 10):

9–10: Outstanding — flawless grammar, excellent vocabulary, and perfect coherence.

7–8: Good — minor issues but overall clear and well-written.

5–6: Average — noticeable grammar or flow issues; some awkward phrasing.

3–4: Poor — frequent errors, weak vocabulary, and low readability.

0–2: Very poor — incoherent, numerous mistakes, and unclear meaning.

Task:

Provide critical feedback with examples of strengths and weaknesses in language.

Justify the score with specific references to the essay’s text.

Assign one strict final score out of 10 according to the above scale.

Essay to evaluate:
{state['essay']}
"""
    
    output = structured_model.invoke(prompt)

    return {'language_feedback':output.feedback, 'individual_scores':[output.score]}

In [11]:
def evaluate_analysis(state: UPSCState):
    prompt = f"""You are a strict and unbiased UPSC evaluator.
Your task is to evaluate the depth and quality of analysis in the candidate’s essay.

Evaluation Criteria (deduct marks for any weakness):

Depth of Analysis – Does the essay go beyond surface-level points, showing nuanced understanding?

Logical Structure – Are ideas well-organized, with clear progression and no contradictions?

Relevance & Focus – Does the analysis stay on-topic without unnecessary content?

Evidence & Examples – Are claims supported with relevant data, facts, or illustrations?

Critical Thinking – Does the candidate present multiple perspectives and weigh them logically?

Scoring Guide (out of 10):

9–10: Exceptional — highly analytical, coherent, insightful, and well-supported arguments.

7–8: Good — clear and logical, but lacks some depth or evidence in places.

5–6: Average — basic points covered, but superficial analysis or limited coherence.

3–4: Weak — poor structure, limited depth, and few or no examples.

0–2: Very poor — incoherent, no real analysis, irrelevant content.

Task:

Provide critical feedback pointing out strengths and weaknesses.

Use specific examples from the essay to justify your evaluation.

Assign one final score out of 10 strictly based on the above scale.

Essay to evaluate:
{state['essay']}
"""
    
    output = structured_model.invoke(prompt)

    return {'analysis_feedback':output.feedback, 'individual_scores':[output.score]}

In [12]:
def evaluate_thought(state: UPSCState):
    prompt = f"""You are a strict and unbiased UPSC auditor evaluating a candidate’s essay.
Assess only the clarity of thought and quality of language — not factual correctness.

Evaluation Criteria (deduct marks for any weakness):

Clarity & Coherence: Is the argument logically structured and easy to follow?

Language Quality: Grammar, spelling, and sentence structure.

Relevance: Does every part contribute to the main idea?

Conciseness: Avoids redundancy and filler words.

Scoring Guide (out of 10):

9–10: Exceptional clarity, flawless grammar, strong logical flow.

7–8: Good but with minor issues in structure or wording.

5–6: Average — noticeable grammar or coherence issues.

3–4: Poor — frequent errors, unclear argument, weak structure.

0–2: Very poor — incoherent, major grammar issues, no logical flow.

Task:

Provide critical, UPSC-level feedback highlighting weaknesses and strengths.

Justify your scoring with specific examples from the essay.

Assign one final score strictly according to the above scale.

Essay to evaluate:
{state['essay']}
"""
    
    output = structured_model.invoke(prompt)

    return {'clarity_feedback':output.feedback, 'individual_scores':[output.score]}

In [13]:
def final_evaluation(state : UPSCState):
    # summary feedback
    prompt = f"""Based on the following feedback create a summarized feedback \n language feedaback  - {state['language_feedback']} \n depth of analysis feedback -{state['analysis_feedback']} \n clarity of thought feedback - {state['clarity_feedback']} """

    final_feedback = model.invoke(prompt)

    # avg score

    avg_score  = sum(state['individual_scores'])/len(state['individual_scores'])

    return {'overall_feedback':final_feedback, 'avg_score':avg_score}



In [14]:
graph = StateGraph(UPSCState)

graph.add_node('evaluate_language', evaluate_language)
graph.add_node('evaluate_analysis', evaluate_analysis)
graph.add_node('evaluate_thought', evaluate_thought)
graph.add_node('final_evaluation', final_evaluation)

# edges

graph.add_edge(START,'evaluate_language')
graph.add_edge(START,'evaluate_analysis')
graph.add_edge(START,'evaluate_thought')

graph.add_edge('evaluate_language','final_evaluation')
graph.add_edge('evaluate_analysis','final_evaluation')
graph.add_edge('evaluate_thought','final_evaluation')

graph.add_edge('final_evaluation',END)

workflow = graph.compile()

In [15]:
initial_state = {'essay': essay}

In [16]:
final_state = workflow.invoke(initial_state)

In [17]:
final_state

{'essay': 'My Day in City\n\nYesterday I going to city with my friend for buy some thing. We wake up very late so no time for eat breakfast, only drink tea. After we catch bus but bus is very much crowded and noisy and smell not good. My friend he say we should go walking but I tell no because my leg already pain from yesterday football play.\n\nIn city we see many people is selling all kind thing, like fruit, shoe, phone and many animal also. I want to buy mango but seller tell me price too much high so I angry and walk away. Then we going to shop for buy bag, but bag is big and heavy and color not so nice.\n\nWe eat lunch in small hotel, food is not so taste good and plate also not clean but we still eat because we hungry too much. Then raining come and we no have umbrella so we become all wet. My shoe making sound chap-chap when walk.\n\nWhen going back home, bus not coming for long time so we sit on bench and wait. Finally bus come but driver very fast drive and many time we almost