In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import json
from datetime import datetime
from typing import Dict, List, Tuple
import statistics
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os

class StudentAnalyzer:
    def __init__(self):
        # Load data
        with open('/kaggle/input/dataset/api endpoints.json', 'r') as f:
            self.historical_data = json.load(f)
        with open('/kaggle/input/dataset/quiz submission data.json', 'r') as f:
            self.current_quiz = json.load(f)
        with open('/kaggle/input/dataset/quiz endpoint.json', 'r') as f:
            self.quiz_questions = json.load(f)
            
        # Initialize ML components for score prediction
        self.score_predictor = LinearRegression()
        self.scaler = StandardScaler()
        
        # Check for GPU availability
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        
        try:
            # Initialize LLM pipeline
            print("Loading LLM model...")
            token = "use your own hugging case read token here"
            
            self.pipe = pipeline(
                "text-generation",
                model="meta-llama/Llama-3.2-3B-Instruct",
                token=token,
                torch_dtype=torch.float16,
                device_map="auto",
            )
            print("LLM model loaded successfully!")
        except Exception as e:
            print(f"Error loading LLM model: {str(e)}")
            raise

    def _analyze_topic_performance(self) -> Dict[str, float]:
        """Analyze performance by topic"""
        topic_scores = {}
        
        for quiz in self.historical_data[:5]:
            topic = quiz['quiz']['topic']
            accuracy = float(quiz['accuracy'].replace(' %', ''))
            
            if topic not in topic_scores:
                topic_scores[topic] = []
            topic_scores[topic].append(accuracy)
            
        # Calculate average accuracy per topic
        return {topic: statistics.mean(scores) for topic, scores in topic_scores.items()}

    def _identify_strengths_weaknesses(self, topic_performance: Dict[str, float]) -> Tuple[List[str], List[str]]:
        """Identify strong and weak topics"""
        if not topic_performance:
            return [], []
            
        avg = statistics.mean(topic_performance.values())
        strengths = [topic for topic, score in topic_performance.items() if score >= avg + 5]
        weaknesses = [topic for topic, score in topic_performance.items() if score <= avg - 5]
        
        return strengths, weaknesses

    def _prepare_ml_data(self) -> Tuple[np.ndarray, np.ndarray]:
        """Prepare data for ML analysis"""
        features = []
        targets = []
        
        for quiz in self.historical_data:
            # Extract features
            feature_vector = [
                float(quiz['accuracy'].replace(' %', '')),
                float(quiz['speed']),
                quiz['correct_answers'],
                quiz['incorrect_answers'],
                len(quiz['response_map']),  # questions attempted
                quiz['mistakes_corrected']
            ]
            features.append(feature_vector)
            targets.append(quiz['score'])
            
        return np.array(features), np.array(targets)

    def predict_potential_score(self) -> float:
        """Predict potential score based on current performance patterns"""
        X, y = self._prepare_ml_data()
        
        if len(X) < 2:  # Need at least 2 data points for meaningful prediction
            return None
            
        # Scale features
        X_scaled = self.scaler.fit_transform(X)
        
        # Train model
        self.score_predictor.fit(X_scaled, y)
        
        # Prepare current quiz features
        current_features = np.array([[
            float(self.current_quiz['accuracy'].replace(' %', '')),
            float(self.current_quiz['speed']),
            self.current_quiz['correct_answers'],
            self.current_quiz['incorrect_answers'],
            len(self.current_quiz['response_map']),
            self.current_quiz['mistakes_corrected']
        ]])
        
        # Scale and predict
        current_scaled = self.scaler.transform(current_features)
        potential_score = self.score_predictor.predict(current_scaled)[0]
        
        return potential_score

    def analyze_question_patterns(self) -> Dict:
        """Analyze patterns in question responses"""
        patterns = {
            'time_based_errors': 0,
            'conceptual_gaps': defaultdict(int),
            'careless_mistakes': 0,
            'topic_wise_accuracy': defaultdict(lambda: {'correct': 0, 'total': 0})
        }
        
        # Analyze current quiz responses
        for q_id, opt_id in self.current_quiz['response_map'].items():
            # Find question in quiz_questions
            question = next((q for q in self.quiz_questions['quiz']['questions'] 
                           if str(q['id']) == str(q_id)), None)
            
            if question:
                topic = question['topic']
                correct_option = next((opt for opt in question['options'] 
                                    if opt['is_correct']), None)
                
                # Update topic-wise accuracy
                patterns['topic_wise_accuracy'][topic]['total'] += 1
                if correct_option and str(correct_option['id']) == str(opt_id):
                    patterns['topic_wise_accuracy'][topic]['correct'] += 1
                else:
                    # Analyze error patterns
                    if float(self.current_quiz['speed']) > 95:
                        patterns['time_based_errors'] += 1
                    patterns['conceptual_gaps'][topic] += 1
                    
                    # Check for careless mistakes
                    if self.current_quiz['mistakes_corrected'] > 0:
                        patterns['careless_mistakes'] += 1
        
        return patterns

    def calculate_learning_curve(self) -> Dict:
        """Calculate learning curve metrics"""
        scores = [quiz['score'] for quiz in self.historical_data]
        accuracies = [float(quiz['accuracy'].replace(' %', '')) 
                     for quiz in self.historical_data]
        
        if len(scores) < 2:
            return None
            
        # Calculate improvement rate
        score_improvement = (scores[0] - scores[-1]) / len(scores)
        accuracy_improvement = (accuracies[0] - accuracies[-1]) / len(accuracies)
        
        return {
            'score_trend': score_improvement,
            'accuracy_trend': accuracy_improvement,
            'consistency_score': np.std(scores),  # Lower is better
            'learning_rate': abs(score_improvement / np.mean(scores)) * 100
        }

    def analyze_performance(self) -> Dict:
        """Enhanced performance analysis"""
        # Calculate key metrics
        recent_scores = [quiz['score'] for quiz in self.historical_data[:5]]
        avg_score = statistics.mean(recent_scores) if recent_scores else 0
        
        accuracies = [float(quiz['accuracy'].replace(' %', '')) for quiz in self.historical_data[:5]]
        avg_accuracy = statistics.mean(accuracies) if accuracies else 0
        
        speeds = [float(quiz['speed']) for quiz in self.historical_data[:5]]
        avg_speed = statistics.mean(speeds) if speeds else 0
        
        # Get topic performance
        topic_performance = self._analyze_topic_performance()
        strengths, weaknesses = self._identify_strengths_weaknesses(topic_performance)
        
        analysis = {
            'average_score': avg_score,
            'average_accuracy': avg_accuracy,
            'average_speed': avg_speed,
            'topic_performance': topic_performance,
            'strengths': strengths,
            'weaknesses': weaknesses
        }
        
        # Add ML predictions
        potential_score = self.predict_potential_score()
        if potential_score is not None:
            analysis['potential_score'] = potential_score
            analysis['score_gap'] = potential_score - analysis['average_score']
        
        # Add question patterns
        analysis['question_patterns'] = self.analyze_question_patterns()
        
        # Add learning curve metrics
        learning_metrics = self.calculate_learning_curve()
        if learning_metrics:
            analysis['learning_metrics'] = learning_metrics
        
        return analysis

    def get_student_persona(self) -> Dict:
        """Enhanced student persona with ML insights"""
        analysis = self.analyze_performance()
        
        # Determine base persona
        if analysis['average_accuracy'] >= 85 and analysis['average_speed'] >= 95:
            base_persona = "Advanced Achiever"
        elif analysis['average_accuracy'] >= 75:
            base_persona = "Steady Performer"
        elif analysis['average_speed'] >= 90:
            base_persona = "Quick Learner"
        else:
            base_persona = "Building Foundations"
        
        # Enhanced characteristics
        characteristics = {
            'learning_style': 'Consistent' if analysis.get('learning_metrics', {}).get('consistency_score', 100) < 15 
                            else 'Variable',
            'performance_potential': 'High' if analysis.get('score_gap', 0) > 15 
                                   else 'Moderate' if analysis.get('score_gap', 0) > 5 
                                   else 'Optimized',
            'error_pattern': 'Time-pressured' if analysis['question_patterns']['time_based_errors'] > 3
                           else 'Conceptual' if sum(analysis['question_patterns']['conceptual_gaps'].values()) > 5
                           else 'Balanced'
        }
        
        return {
            'base_persona': base_persona,
            'characteristics': characteristics
        }

    def generate_recommendations(self) -> Dict:
        """Enhanced recommendations with ML insights"""
        analysis = self.analyze_performance()
        recommendations = {
            'focus_areas': [],
            'study_tips': [],
            'practice_suggestions': []
        }
        
        # Add topic-specific recommendations
        for topic in analysis['weaknesses']:
            recommendations['focus_areas'].append(f"Review core concepts in {topic}")
            recommendations['practice_suggestions'].append(f"Take more practice quizzes on {topic}")
        
        # Add ML-based recommendations
        if 'potential_score' in analysis:
            score_gap = analysis['score_gap']
            if score_gap > 10:
                recommendations['potential_improvement'] = [
                    f"You have the potential to improve your score by {score_gap:.1f} points",
                    "Focus on reducing careless mistakes",
                    "Review questions where you changed your answer"
                ]
        
        # Add pattern-based recommendations
        patterns = analysis['question_patterns']
        if patterns['time_based_errors'] > 0:
            recommendations['study_tips'].append(
                "Consider spending more time on complex questions"
            )
        
        # Add topic-specific recommendations based on conceptual gaps
        for topic, count in patterns['conceptual_gaps'].items():
            if count > 2:
                recommendations['focus_areas'].append(
                    f"Deep dive into fundamental concepts of {topic}"
                )
        
        # Add learning curve based recommendations
        if 'learning_metrics' in analysis:
            metrics = analysis['learning_metrics']
            if metrics['consistency_score'] > 20:
                recommendations['study_tips'].append(
                    "Work on maintaining consistent performance across quizzes"
                )
            if metrics['learning_rate'] < 5:
                recommendations['study_tips'].append(
                    "Consider revising your study strategy to improve learning rate"
                )
        
        return recommendations

    def analyze_historical_trends(self) -> Dict:
        """Analyze trends from historical quiz data"""
        last_5_quizzes = self.historical_data[:5]
        
        trends = {
            'score_progression': [],
            'accuracy_progression': [],
            'speed_progression': [],
            'topic_mastery': defaultdict(list),
            'question_patterns': {
                'response_time_trends': [],
                'mistake_patterns': defaultdict(int),
                'improvement_areas': set()
            }
        }
        
        for quiz in last_5_quizzes:
            # Track basic metrics progression
            trends['score_progression'].append(quiz['score'])
            trends['accuracy_progression'].append(float(quiz['accuracy'].replace(' %', '')))
            trends['speed_progression'].append(float(quiz['speed']))
            
            # Track topic mastery
            topic = quiz['quiz']['topic']
            accuracy = float(quiz['accuracy'].replace(' %', ''))
            trends['topic_mastery'][topic].append(accuracy)
            
            # Analyze response patterns
            duration = self._parse_duration(quiz['duration'])
            questions_attempted = len(quiz['response_map'])
            avg_time_per_question = duration / questions_attempted if questions_attempted > 0 else 0
            trends['question_patterns']['response_time_trends'].append(avg_time_per_question)
            
            # Track improvement areas
            if quiz['incorrect_answers'] > 0:
                trends['question_patterns']['mistake_patterns'][topic] += quiz['incorrect_answers']
                if quiz['incorrect_answers'] > 2:  # Threshold for identifying improvement areas
                    trends['question_patterns']['improvement_areas'].add(topic)
        
        return trends

    def _parse_duration(self, duration_str: str) -> float:
        """Convert duration string to minutes"""
        try:
            minutes, seconds = map(int, duration_str.split(':'))
            return minutes + seconds/60
        except:
            return 0

    def calculate_performance_metrics(self) -> Dict:
        """Calculate detailed performance metrics from historical data"""
        trends = self.analyze_historical_trends()
        
        metrics = {
            'score': {
                'trend': self._calculate_trend(trends['score_progression']),
                'volatility': np.std(trends['score_progression']),
                'improvement_rate': self._calculate_improvement_rate(trends['score_progression'])
            },
            'accuracy': {
                'trend': self._calculate_trend(trends['accuracy_progression']),
                'consistency': np.std(trends['accuracy_progression']),
                'peak_performance': max(trends['accuracy_progression'])
            },
            'topic_proficiency': {},
            'time_management': {
                'avg_time_per_question': np.mean(trends['question_patterns']['response_time_trends']),
                'time_trend': self._calculate_trend(trends['question_patterns']['response_time_trends'])
            }
        }
        
        # Calculate topic proficiency
        for topic, accuracies in trends['topic_mastery'].items():
            metrics['topic_proficiency'][topic] = {
                'average': np.mean(accuracies),
                'trend': self._calculate_trend(accuracies),
                'mastery_level': self._determine_mastery_level(accuracies)
            }
        
        return metrics

    def _calculate_trend(self, values: List[float]) -> str:
        """Calculate trend direction and magnitude"""
        if len(values) < 2:
            return "insufficient data"
        
        slope = (values[0] - values[-1]) / len(values)
        
        if abs(slope) < 0.5:
            return "stable"
        elif slope > 0:
            return "improving" if slope > 2 else "slightly improving"
        else:
            return "declining" if slope < -2 else "slightly declining"

    def _calculate_improvement_rate(self, values: List[float]) -> float:
        """Calculate rate of improvement"""
        if len(values) < 2:
            return 0
        return ((values[0] - values[-1]) / values[-1]) * 100

    def _determine_mastery_level(self, accuracies: List[float]) -> str:
        """Determine mastery level based on accuracy trends"""
        avg_accuracy = np.mean(accuracies)
        consistency = np.std(accuracies)
        
        if avg_accuracy >= 90 and consistency < 5:
            return "mastered"
        elif avg_accuracy >= 80:
            return "proficient"
        elif avg_accuracy >= 70:
            return "developing"
        else:
            return "needs improvement"

    def generate_detailed_report(self) -> Dict:
        """Generate a detailed performance report"""
        metrics = self.calculate_performance_metrics()
        trends = self.analyze_historical_trends()
        
        report = {
            'performance_summary': {
                'overall_trend': metrics['score']['trend'],
                'consistency_level': 'high' if metrics['score']['volatility'] < 10 else 'moderate' if metrics['score']['volatility'] < 20 else 'low',
                'improvement_rate': f"{metrics['score']['improvement_rate']:.1f}%"
            },
            'topic_analysis': {
                topic: {
                    'mastery_level': data['mastery_level'],
                    'trend': data['trend'],
                    'recommendation': self._generate_topic_recommendation(data)
                }
                for topic, data in metrics['topic_proficiency'].items()
            },
            'time_management': {
                'efficiency': metrics['time_management']['time_trend'],
                'avg_time_per_question': f"{metrics['time_management']['avg_time_per_question']:.1f} minutes",
                'recommendation': self._generate_time_recommendation(metrics['time_management'])
            },
            'improvement_areas': list(trends['question_patterns']['improvement_areas']),
            'strengths': [
                topic for topic, data in metrics['topic_proficiency'].items()
                if data['mastery_level'] in ['mastered', 'proficient']
            ]
        }
        
        return report

    def _generate_topic_recommendation(self, topic_data: Dict) -> str:
        """Generate specific recommendations based on topic performance"""
        if topic_data['mastery_level'] == 'needs improvement':
            return "Focus on fundamental concepts and increase practice frequency"
        elif topic_data['mastery_level'] == 'developing':
            return "Continue regular practice and focus on weak areas"
        elif topic_data['mastery_level'] == 'proficient':
            return "Maintain current performance and work on advanced concepts"
        else:
            return "Focus on maintaining mastery and helping others"

    def _generate_time_recommendation(self, time_data: Dict) -> str:
        """Generate time management recommendations"""
        avg_time = time_data['avg_time_per_question']
        if avg_time > 2:
            return "Work on improving question solving speed"
        elif time_data['time_trend'] == 'declining':
            return "Focus on maintaining accuracy while increasing speed"
        else:
            return "Current time management is effective"

    def generate_performance_visualizations(self):
        """Generate comprehensive performance visualizations"""
        plt.style.use('seaborn')
        
        # Create a figure with subplots
        fig = plt.figure(figsize=(20, 25))
        gs = GridSpec(4, 2, figure=fig)
        
        # Add a main title
        fig.suptitle('Student Performance Analysis Dashboard', fontsize=16, y=0.95)
        
        # 1. Topic Performance Bar Chart
        ax1 = fig.add_subplot(gs[0, :])
        self._plot_topic_performance(ax1)
        
        # 2. Score Progression Line Chart
        ax2 = fig.add_subplot(gs[1, 0])
        self._plot_score_progression(ax2)
        
        # 3. Accuracy vs Speed Scatter Plot
        ax3 = fig.add_subplot(gs[1, 1])
        self._plot_accuracy_speed_relationship(ax3)
        
        # 4. Error Types Distribution
        ax4 = fig.add_subplot(gs[2, 0])
        self._plot_error_distribution(ax4)
        
        # 5. Question Response Pattern
        ax5 = fig.add_subplot(gs[2, 1])
        self._plot_response_pattern(ax5)
        
        # 6. Performance Metrics Summary
        ax6 = fig.add_subplot(gs[3, :])
        self._plot_performance_metrics(ax6)
        
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.savefig('performance_analysis.png', dpi=300, bbox_inches='tight')
        plt.close()

    def _plot_topic_performance(self, ax):
        """Plot topic-wise performance bar chart"""
        topic_perf = self.analyze_performance()['topic_performance']
        topics = list(topic_perf.keys())
        scores = list(topic_perf.values())
        
        # Create bars
        bars = ax.bar(topics, scores)
        
        # Customize bars
        for i, bar in enumerate(bars):
            if scores[i] >= 80:
                bar.set_color('#2ecc71')  # Green for good performance
            elif scores[i] >= 60:
                bar.set_color('#f1c40f')  # Yellow for average
            else:
                bar.set_color('#e74c3c')  # Red for needs improvement
            
            # Add value labels on top of bars
            ax.text(bar.get_x() + bar.get_width()/2., bar.get_height(),
                    f'{scores[i]:.1f}%',
                    ha='center', va='bottom')
        
        ax.set_title('Topic-wise Performance Analysis', fontsize=12, pad=20)
        ax.set_xlabel('Topics', fontsize=10)
        ax.set_ylabel('Accuracy (%)', fontsize=10)
        ax.tick_params(axis='x', rotation=45)
        ax.grid(True, alpha=0.3)
        ax.set_ylim(0, 100)

    def _plot_score_progression(self, ax):
        """Plot score progression over time"""
        scores = [quiz['score'] for quiz in self.historical_data[:5]]
        dates = [quiz['submitted_at'].split('T')[0] for quiz in self.historical_data[:5]]
        
        # Plot line with markers
        ax.plot(dates, scores, 'o-', color='#3498db', linewidth=2, markersize=8)
        
        # Add value labels
        for i, score in enumerate(scores):
            ax.text(i, score, f'{score:.1f}', ha='center', va='bottom')
        
        ax.set_title('Score Progression Over Time', fontsize=12, pad=20)
        ax.set_xlabel('Date', fontsize=10)
        ax.set_ylabel('Score', fontsize=10)
        ax.tick_params(axis='x', rotation=45)
        ax.grid(True, alpha=0.3)

    def _plot_accuracy_speed_relationship(self, ax):
        """Plot accuracy vs speed scatter plot"""
        accuracies = [float(quiz['accuracy'].replace(' %', '')) 
                     for quiz in self.historical_data[:5]]
        speeds = [float(quiz['speed']) for quiz in self.historical_data[:5]]
        
        # Create scatter plot
        scatter = ax.scatter(speeds, accuracies, c=range(len(speeds)), 
                            cmap='viridis', s=100)
        
        # Add trend line
        z = np.polyfit(speeds, accuracies, 1)
        p = np.poly1d(z)
        ax.plot(speeds, p(speeds), "r--", alpha=0.8, label='Trend')
        
        ax.set_title('Speed vs Accuracy Relationship', fontsize=12, pad=20)
        ax.set_xlabel('Speed', fontsize=10)
        ax.set_ylabel('Accuracy (%)', fontsize=10)
        ax.grid(True, alpha=0.3)
        ax.legend()

    def _plot_error_distribution(self, ax):
        """Plot error types distribution"""
        mistakes = self.analyze_question_mistakes()
        
        error_types = ['Conceptual', 'Calculation', 'Comprehension']
        error_counts = [
            len(mistakes['conceptual_errors']),
            len(mistakes['calculation_errors']),
            len(mistakes['comprehension_errors'])
        ]
        
        colors = ['#e74c3c', '#f39c12', '#3498db']
        wedges, texts, autotexts = ax.pie(error_counts, labels=error_types, colors=colors,
                                         autopct='%1.1f%%', startangle=90)
        
        # Enhance the appearance
        plt.setp(autotexts, size=8, weight="bold")
        plt.setp(texts, size=10)
        
        ax.set_title('Distribution of Error Types', fontsize=12, pad=20)

    def _plot_response_pattern(self, ax):
        """Plot question response pattern analysis"""
        patterns = self.analyze_question_patterns()
        
        # Prepare data
        categories = ['Correct', 'Time-based Errors', 'Careless Mistakes']
        values = [
            self.current_quiz['correct_answers'],
            patterns['time_based_errors'],
            patterns['careless_mistakes']
        ]
        
        # Create horizontal bars
        bars = ax.barh(categories, values, color=['#2ecc71', '#e74c3c', '#f1c40f'])
        
        # Add value labels
        for bar in bars:
            width = bar.get_width()
            ax.text(width, bar.get_y() + bar.get_height()/2.,
                    f'{int(width)}', 
                    ha='left', va='center', fontsize=10)
        
        ax.set_title('Question Response Pattern', fontsize=12, pad=20)
        ax.set_xlabel('Number of Questions', fontsize=10)
        ax.grid(True, alpha=0.3)

    def _plot_performance_metrics(self, ax):
        """Plot performance metrics summary"""
        metrics = self.calculate_performance_metrics()
        
        # Prepare data
        categories = ['Score Trend', 'Accuracy', 'Time Management', 'Consistency']
        values = [
            float(metrics['score']['improvement_rate']),
            metrics['accuracy']['peak_performance'],
            100 - (metrics['time_management']['avg_time_per_question'] * 10),  # Convert to percentage
            100 - (metrics['score']['volatility'] * 2)  # Convert to percentage
        ]
        
        # Create radar chart
        angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False)
        values = np.concatenate((values, [values[0]]))  # complete the loop
        angles = np.concatenate((angles, [angles[0]]))  # complete the loop
        
        ax.plot(angles, values, 'o-', linewidth=2)
        ax.fill(angles, values, alpha=0.25)
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(categories)
        
        ax.set_title('Performance Metrics Summary', fontsize=12, pad=20)
        ax.grid(True)

    def analyze_question_mistakes(self) -> Dict:
        """Analyze incorrect answers in detail"""
        mistakes_analysis = {
            'conceptual_errors': [],
            'calculation_errors': [],
            'comprehension_errors': [],
            'topic_wise_mistakes': defaultdict(list),
            'pattern_summary': ''
        }
        
        # Analyze current quiz responses
        for q_id, opt_id in self.current_quiz['response_map'].items():
            question = next((q for q in self.quiz_questions['quiz']['questions'] 
                           if str(q['id']) == str(q_id)), None)
            
            if question:
                correct_option = next((opt for opt in question['options'] 
                                    if opt['is_correct']), None)
                selected_option = next((opt for opt in question['options'] 
                                     if str(opt['id']) == str(opt_id)), None)
                
                if correct_option and selected_option and str(correct_option['id']) != str(opt_id):
                    mistake = {
                        'question': question['description'],
                        'correct_answer': correct_option['description'],
                        'student_answer': selected_option['description'],
                        'topic': question['topic'],
                        'detailed_solution': question.get('detailed_solution', '')  # Use get() with default value
                    }
                    
                    # Analyze mistake type using solution if available
                    solution_text = mistake['detailed_solution'].lower() if mistake['detailed_solution'] else ''
                    
                    if 'calculation' in solution_text or any(word in question['description'].lower() 
                       for word in ['calculate', 'compute', 'find', 'solve']):
                        mistakes_analysis['calculation_errors'].append(mistake)
                    elif solution_text and any(word in solution_text 
                         for word in ['concept', 'principle', 'theory']):
                        mistakes_analysis['conceptual_errors'].append(mistake)
                    else:
                        mistakes_analysis['comprehension_errors'].append(mistake)
                    
                    mistakes_analysis['topic_wise_mistakes'][question['topic']].append(mistake)
        
        return mistakes_analysis

    def _format_mistakes_for_prompt(self, mistakes_analysis: Dict) -> str:
        """Format mistakes analysis into a structured string for the prompt"""
        formatted_mistakes = "Mistakes Analysis:\n"
        
        # Add topic-wise mistakes
        for topic, mistakes in mistakes_analysis['topic_wise_mistakes'].items():
            formatted_mistakes += f"\nTopic: {topic}\n"
            for idx, mistake in enumerate(mistakes, 1):
                formatted_mistakes += f"{idx}. Question: {mistake['question']}\n"
                formatted_mistakes += f"   - Student Answer: {mistake['student_answer']}\n"
                formatted_mistakes += f"   - Correct Answer: {mistake['correct_answer']}\n"
        
        return formatted_mistakes

    def _generate_llm_analysis(self, prompt: str) -> str:
        """Generate analysis using LLM"""
        try:
            # Format prompt
            formatted_prompt = f"""<s>[INST] <<SYS>>
You are an expert educational advisor specializing in NEET exam preparation.
Your task is to analyze student performance and provide detailed, actionable recommendations.
<</SYS>>

{prompt} [/INST]"""
            
            # Generate response
            outputs = self.pipe(
                formatted_prompt,
                max_new_tokens=1024,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.1,
                do_sample=True,
                return_full_text=False
            )
            
            if outputs and len(outputs) > 0:
                response = outputs[0]['generated_text']
                # Clean up response
                response = response.replace("[/INST]", "").strip()
                return response
            
            return "Error: Unable to generate analysis"
            
        except Exception as e:
            print(f"Error in LLM analysis: {str(e)}")
            return f"Error generating analysis: {str(e)}"

    def _generate_llm_prompt(self, analysis: Dict, mistakes: Dict) -> str:
        """Generate detailed prompt for Llama"""
        prompt = f"""Task: Analyze student performance in NEET preparation and create a personalized study plan.

PERFORMANCE ANALYSIS:
Score: {analysis['average_score']:.1f}/100
Accuracy: {analysis['average_accuracy']:.1f}%
Speed: {analysis['average_speed']:.1f}%

STRENGTHS:
{', '.join(analysis['strengths']) if analysis['strengths'] else 'No clear strengths identified yet'}

WEAK AREAS:
{', '.join(analysis['weaknesses']) if analysis['weaknesses'] else 'No significant weaknesses identified'}

ERROR ANALYSIS:
• Conceptual Errors: {len(mistakes['conceptual_errors'])}
• Calculation Errors: {len(mistakes['calculation_errors'])}
• Understanding Errors: {len(mistakes['comprehension_errors'])}

SPECIFIC MISTAKES:
"""
        # Add detailed mistake examples
        for topic, topic_mistakes in mistakes['topic_wise_mistakes'].items():
            if topic_mistakes:
                prompt += f"\n{topic}:\n"
                for mistake in topic_mistakes[:2]:
                    prompt += f"""Question: {mistake['question']}
- Student's Answer: {mistake['student_answer']}
- Correct Answer: {mistake['correct_answer']}
- Concept: {mistake['detailed_solution'][:150]}...\n"""

        prompt += """
Based on this analysis, provide:

1. IMMEDIATE ACTIONS (3-4 specific steps):
- What should the student focus on right now?
- Which concepts need urgent revision?

2. WEEKLY STUDY PLAN:
- Day-by-day schedule
- Time allocation for each topic
- Specific resources to use

3. TOPIC-WISE STRATEGY:
- For each weak area, provide:
  * Key concepts to master
  * Recommended practice approach
  * Common pitfalls to avoid

4. PRACTICE RECOMMENDATIONS:
- Types of questions to focus on
- Time management tips
- Error prevention strategies

Format your response with clear headings and bullet points. Be specific and actionable."""

        return prompt

    def analyze_mistakes_with_llm(self) -> str:
        """Generate detailed mistake analysis using Llama"""
        mistakes = self.analyze_question_mistakes()
        
        prompt = f"""Task: Analyze student's mistakes in NEET preparation and provide detailed feedback.

MISTAKE PATTERNS:
• Conceptual Errors: {len(mistakes['conceptual_errors'])}
• Calculation Errors: {len(mistakes['calculation_errors'])}
• Understanding Errors: {len(mistakes['comprehension_errors'])}

DETAILED EXAMPLES:"""
        
        for category, errors in [
            ("Conceptual", mistakes['conceptual_errors']),
            ("Calculation", mistakes['calculation_errors']),
            ("Understanding", mistakes['comprehension_errors'])
        ]:
            if errors:
                prompt += f"\n\n{category} Mistakes:\n"
                for error in errors[:2]:
                    prompt += f"""
Question: {error['question']}
Student's Answer: {error['student_answer']}
Correct Answer: {error['correct_answer']}
Topic: {error['topic']}
Explanation: {error['detailed_solution'][:200]}...\n"""

        prompt += """
Provide a detailed analysis including:

1. PATTERN ANALYSIS:
- What are the common themes in these mistakes?
- Are there specific topics or concepts that need attention?

2. CONCEPTUAL GAPS:
- Identify fundamental concepts that need strengthening
- Explain how these gaps affect performance

3. IMPROVEMENT STRATEGY:
- Specific steps to address each type of error
- Study techniques to prevent similar mistakes
- Practice recommendations

4. QUICK WINS:
- Immediate actions to improve performance
- Common traps to avoid
- Time management tips

Be specific and provide actionable advice that the student can implement immediately."""

        return self._generate_llm_analysis(prompt)

    def generate_study_plan(self) -> Dict:
        """Generate personalized study plan using LLM"""
        analysis = self.analyze_performance()
        mistakes = self.analyze_question_mistakes()
        
        # Generate prompt
        prompt = self._generate_llm_prompt(analysis, mistakes)
        
        # Get LLM response
        llm_response = self._generate_llm_analysis(prompt)
        
        # Parse the response into structured format
        study_plan = self._parse_study_plan(llm_response)
        
        return study_plan

    def _parse_study_plan(self, llm_response: str) -> Dict:
        """Parse LLM response into structured study plan"""
        study_plan = {
            'immediate_focus': [],
            'weekly_goals': [],
            'topic_wise_preparation': {},
            'practice_recommendations': [],
            'recommended_resources': []
        }
        
        current_section = None
        current_topic = None
        
        # Split response into lines and process
        for line in llm_response.split('\n'):
            line = line.strip()
            if not line:
                continue
            
            # Identify sections
            if 'IMMEDIATE ACTIONS' in line:
                current_section = 'immediate_focus'
            elif 'WEEKLY STUDY PLAN' in line:
                current_section = 'weekly_goals'
            elif 'TOPIC-WISE STRATEGY' in line:
                current_section = 'topic_wise_preparation'
            elif 'PRACTICE RECOMMENDATIONS' in line:
                current_section = 'practice_recommendations'
            elif line.endswith(':') and current_section == 'topic_wise_preparation':
                current_topic = line[:-1]
                study_plan['topic_wise_preparation'][current_topic] = []
            elif line.startswith('- ') or line.startswith('* '):
                content = line[2:].strip()
                if current_section == 'topic_wise_preparation' and current_topic:
                    study_plan['topic_wise_preparation'][current_topic].append(content)
                elif current_section:
                    study_plan[current_section].append(content)
        
        return study_plan

    def analyze_and_generate_report(self) -> Dict:
        """Pipeline to generate complete analysis and recommendations"""
        try:
            # Initialize report structure
            report = {
                'performance_analysis': None,
                'detailed_report': None,
                'student_persona': None,
                'mistakes_analysis': None,
                'study_plan': None,
                'llm_analysis': None
            }
            
            # Step 1: Basic Performance Analysis
            report['performance_analysis'] = self.analyze_performance()
            if not report['performance_analysis']:
                raise ValueError("Failed to generate performance analysis")
            
            # Step 2: Mistake Analysis
            report['mistakes_analysis'] = self.analyze_question_mistakes()
            if not report['mistakes_analysis']:
                raise ValueError("Failed to generate mistake analysis")
            
            # Step 3: Generate Detailed Report
            report['detailed_report'] = self.generate_detailed_report()
            if not report['detailed_report']:
                raise ValueError("Failed to generate detailed report")
            
            # Step 4: Get Student Persona
            report['student_persona'] = self.get_student_persona()
            if not report['student_persona']:
                raise ValueError("Failed to generate student persona")
            
            # Step 5: Generate Study Plan
            report['study_plan'] = self.generate_study_plan()
            
            # Step 6: LLM-based Mistake Analysis
            report['llm_analysis'] = self.analyze_mistakes_with_llm()
            
            # Step 7: Generate Visualizations
            try:
                self.generate_performance_visualizations()
            except Exception as viz_error:
                print(f"Warning: Failed to generate visualizations: {str(viz_error)}")
            
            return report
            
        except Exception as e:
            print(f"Error in analysis pipeline: {str(e)}")
            return None

    def _clean_llm_response(self, response: str) -> str:
        """Clean and format LLM response for better readability"""
        # Remove any prompt artifacts
        response = response.replace("[INST]", "").replace("[/INST]", "")
        response = response.replace("Context:", "").replace("Question:", "")
        
        # Get only the relevant part of the response
        if "Provide a clear, concise response" in response:
            response = response.split("Provide a clear, concise response")[0]
        
        # Clean up extra whitespace and newlines
        lines = [line.strip() for line in response.split('\n') if line.strip()]
        response = '\n'.join(lines)
        
        return response

    def _format_qa_prompt(self, question: str, context: Dict) -> str:
        """Format prompt for Q&A to get more structured responses"""
        return f"""[INST] You are a NEET exam preparation expert. Provide a clear, structured response to the student's question.

Current Performance:
• Score: {context['performance']['average_score']:.1f}
• Accuracy: {context['performance']['average_accuracy']:.1f}%
• Areas for improvement: {', '.join(context['performance']['weaknesses'])}

Student's Question: {question}

Provide your response in this format:
1. Direct Answer
2. Explanation (if needed)
3. Specific Tips or Actions
4. Related Topics to Study

Keep each section concise and focused. [/INST]"""

def main():
    try:
        analyzer = StudentAnalyzer()
        
        # Get all analyses first
        performance = analyzer.analyze_performance()
        mistakes = analyzer.analyze_question_mistakes()
        patterns = analyzer.analyze_question_patterns()
        
        # Print sections with clear separation
        print("\n" + "="*50)
        print("📊 PERFORMANCE SUMMARY".center(50))
        print("="*50)
        print(f"📈 Average Score:    {performance['average_score']:.1f}")
        print(f"🎯 Average Accuracy: {performance['average_accuracy']:.1f}%")
        print(f"⚡ Average Speed:    {performance['average_speed']:.1f}%")
        
        print("\n" + "="*50)
        print("📚 TOPIC ANALYSIS".center(50))
        print("="*50)
        print("\n💪 Strong Topics:")
        for topic in performance['strengths']:
            accuracy = performance['topic_performance'].get(topic, 0)
            print(f"  ✓ {topic}: {accuracy:.1f}%")
            
        print("\n📝 Topics Needing Improvement:")
        for topic in performance['weaknesses']:
            accuracy = performance['topic_performance'].get(topic, 0)
            print(f"  • {topic}: {accuracy:.1f}%")
        
        print("\n" + "="*50)
        print("❌ MISTAKE ANALYSIS".center(50))
        print("="*50)
        print(f"\nQuestions Attempted: {len(analyzer.current_quiz['response_map'])}")
        print(f"✓ Correct Answers:   {analyzer.current_quiz['correct_answers']}")
        print(f"✗ Incorrect Answers: {analyzer.current_quiz['incorrect_answers']}")
        
        # Show incorrect answers in a cleaner format
        if mistakes['topic_wise_mistakes']:
            print("\n" + "-"*50)
            print("Detailed Analysis of Incorrect Answers:")
            print("-"*50)
            for topic, mistakes_list in mistakes['topic_wise_mistakes'].items():
                print(f"\n📘 Topic: {topic}")
                for idx, mistake in enumerate(mistakes_list, 1):
                    print(f"\nQuestion {idx}:")
                    print(f"Q: {mistake['question']}")
                    print(f"✗ Your Answer:     {mistake['student_answer']}")
                    print(f"✓ Correct Answer:  {mistake['correct_answer']}")
                    if mistake['detailed_solution']:
                        print(f"💡 Explanation: {mistake['detailed_solution']}")
                    print("-"*40)
        
        # Generate visualizations
        analyzer.generate_performance_visualizations()
        print("\n📊 Visualization plots saved as 'performance_analysis.png'")
        
        # AI Analysis section
        print("\n" + "="*50)
        print("🤖 AI-POWERED RECOMMENDATIONS".center(50))
        print("="*50)
        
        # Format mistakes for LLM
        formatted_mistakes = analyzer._format_mistakes_for_prompt(mistakes)
        analysis_prompt = f"""
Based on the student's performance:
- Score: {performance['average_score']:.1f}
- Accuracy: {performance['average_accuracy']:.1f}%
- Speed: {performance['average_speed']:.1f}%

{formatted_mistakes}

Please provide a concise analysis with:
1. Key misconceptions identified
2. Specific topics to review
3. Recommended study approach
4. Practice suggestions
"""
        # Get and print LLM analysis
        response = analyzer.pipe(
            analysis_prompt,
            max_new_tokens=512,  # Reduced for conciseness
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            do_sample=True,
        )
        print("\n" + response[0]['generated_text'].strip())
        
        # Interactive Q&A session
        print("\n" + "="*50)
        print("💬 INTERACTIVE LEARNING ASSISTANT".center(50))
        print("="*50)
        print("\nAsk questions about your performance or concepts (type 'exit' to end)")
        print("-"*50)
        
        context = {
            'performance': performance,
            'mistakes': mistakes,
            'patterns': patterns
        }
        
        while True:
            user_input = input("\n❓ Your question: ").strip()
            
            if user_input.lower() == 'exit':
                print("\nThank you for using the Learning Assistant! Good luck with your studies! 👋")
                break
            
            # Get LLM response with formatted prompt
            response = analyzer.pipe(
                analyzer._format_qa_prompt(user_input, context),
                max_new_tokens=256,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.1,
                do_sample=True,
            )
            
            # Clean and print response
            cleaned_response = analyzer._clean_llm_response(response[0]['generated_text'])
            
            print("\n💡 Answer:")
            print("-"*50)
            
            # Format the sections
            sections = cleaned_response.split('\n')
            current_section = ""
            
            for line in sections:
                if line.strip():
                    if line.startswith(('1.', '2.', '3.', '4.')):
                        if current_section:  # Add spacing between sections
                            print()
                        current_section = line
                        print(f"➤ {line.split('.', 1)[1].strip()}")
                    else:
                        print(f"  {line.strip()}")
            
            print("-"*50)
        
    except Exception as e:
        print(f"❌ Error in analysis: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Using device: cuda
Loading LLM model...


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0
  plt.style.use('seaborn')


LLM model loaded successfully!

              📊 PERFORMANCE SUMMARY               
📈 Average Score:    77.6
🎯 Average Accuracy: 81.4%
⚡ Average Speed:    99.2%

                 📚 TOPIC ANALYSIS                 

💪 Strong Topics:
  ✓ Body Fluids and Circulation : 95.0%

📝 Topics Needing Improvement:
  • Body Fluids and Circulation: 72.3%

                ❌ MISTAKE ANALYSIS                

Questions Attempted: 10
✓ Correct Answers:   8
✗ Incorrect Answers: 2

--------------------------------------------------
Detailed Analysis of Incorrect Answers:
--------------------------------------------------

📘 Topic: structural organisation in animals 

Question 1:
Q: The secretions of endocrine glands are released directly
✗ Your Answer:     into the brain tissue
✓ Correct Answer:  into the blood stream
----------------------------------------

Question 2:
Q: Vasa efferentia in male frog, enter the kidney and open into
✗ Your Answer:     Urinogenital duct
✓ Correct Answer:  Bidder's canal
----

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



📊 Visualization plots saved as 'performance_analysis.png'

           🤖 AI-POWERED RECOMMENDATIONS           

Based on the student's performance:
- Score: 77.6
- Accuracy: 81.4%
- Speed: 99.2%

Mistakes Analysis:

Topic: structural organisation in animals 
1. Question: The secretions of endocrine glands are released directly
   - Student Answer: into the brain tissue
   - Correct Answer: into the blood stream
2. Question: Vasa efferentia in male frog, enter the kidney and open into
   - Student Answer: Urinogenital duct
   - Correct Answer: Bidder's canal


Please provide a concise analysis with:
1. Key misconceptions identified
2. Specific topics to review
3. Recommended study approach
4. Practice suggestions
5. Evaluation criteria for future assessment

Analysis:

Key Misconceptions Identified:
1. Structural Organisation in Animals - Endocrine Glands Secretion (students think secretion goes directly to the brain)
   - This is incorrect as the correct answer states that endocrine gl


❓ Your question:  what topics should i study now?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



💡 Answer:
--------------------------------------------------
  You are a NEET exam preparation expert. Provide a clear, structured response to the student's question.
  Current Performance:
  • Score: 77.6
  • Accuracy: 81.4%
  • Areas for improvement: Body Fluids and Circulation
  Student's  what topics should i study now?
  Provide your response in this format:
➤ Direct Answer

➤ Explanation (if needed)

➤ Specific Tips or Actions

➤ Related Topics to Study
  Keep each section concise and focused.
  **Response**
  **1. Direct Answer**
  To improve your performance, focus on studying the following topics:
  - Blood and its components
  - Blood circulation system
  - Lymphatic system
  **2. Explanation**
  These topics are crucial because they directly relate to your identified areas of weakness in Body Fluids and Circulation. Mastering these concepts will help you better understand how blood and lymph circulate through the body, allowing you to make more accurate connections between 


❓ Your question:  exit



Thank you for using the Learning Assistant! Good luck with your studies! 👋
