In [13]:
# Import necessary libraries for data analysis and manipulation
import pandas as pd
import numpy as np
from datetime import datetime

# Display confirmation message
print("Libraries imported successfully!")
print("Pandas version:", pd.__version__)
print("NumPy version:", np.__version__)


Libraries imported successfully!
Pandas version: 2.3.3
NumPy version: 2.3.4


In [14]:
# Define Student class using Object-Oriented Programming principles
class Student:
    """
    Class to represent individual student with their academic information
    """
    
    def __init__(self, student_id, name, age, gender):
        """
        Initialize Student object with basic information
        
        Parameters:
        student_id (int): Unique identifier for student
        name (str): Student's full name
        age (int): Student's age
        gender (str): Student's gender
        """
        self.student_id = student_id
        self.name = name
        self.age = age
        self.gender = gender
        self.subjects = {}
    
    def add_subject_marks(self, subject, marks):
        """
        Add marks for a specific subject
        
        Parameters:
        subject (str): Subject name
        marks (float): Marks obtained in the subject
        """
        self.subjects[subject] = marks
    
    def calculate_average(self):
        """
        Calculate average marks across all subjects
        
        Returns:
        float: Average marks
        """
        if not self.subjects:
            return 0
        return np.mean(list(self.subjects.values()))
    
    def get_status(self):
        """
        Determine pass/fail status based on average marks
        
        Returns:
        str: "Pass" if average >= 40, otherwise "Fail"
        """
        avg = self.calculate_average()
        if avg >= 40:
            return "Pass"
        else:
            return "Fail"
    
    def display_info(self):
        """
        Display complete student information
        """
        print("\n" + "="*50)
        print("Student ID:", self.student_id)
        print("Name:", self.name)
        print("Age:", self.age)
        print("Gender:", self.gender)
        print("Subjects:", self.subjects)
        print("Average Marks: {:.2f}".format(self.calculate_average()))
        print("Status:", self.get_status())
        print("="*50)

# Display confirmation
print("Student class defined successfully!")


Student class defined successfully!


In [15]:
# Define PerformanceAnalyzer class for batch data analysis
class PerformanceAnalyzer:
    """
    Class to analyze performance data for multiple students
    """
    
    def __init__(self, csv_file=None):
        """
        Initialize PerformanceAnalyzer object
        
        Parameters:
        csv_file (str): Path to CSV file (optional)
        """
        self.students = []
        self.df = None
        if csv_file:
            self.load_from_csv(csv_file)
    
    def add_student(self, student):
        """
        Add a student object to the analyzer
        
        Parameters:
        student (Student): Student object to be added
        """
        self.students.append(student)
    
    def load_from_csv(self, filename):
        """
        Load student data from CSV file
        
        Parameters:
        filename (str): Path to CSV file
        
        Returns:
        DataFrame: Loaded student data
        """
        try:
            self.df = pd.read_csv(filename)
            # Set index to start from 1 instead of 0
            self.df.index = range(1, len(self.df) + 1)
            print("Data loaded successfully from", filename)
            print("Total Students:", len(self.df))
            return self.df
        except FileNotFoundError:
            print("Error: File", filename, "not found!")
    
    def save_to_csv(self, filename="student_data.csv"):
        """
        Save current students data to CSV file
        
        Parameters:
        filename (str): Output CSV file name
        """
        data = []
        for student in self.students:
            row = {
                'StudentID': student.student_id,
                'Name': student.name,
                'Age': student.age,
                'Gender': student.gender,
            }
            row.update(student.subjects)
            data.append(row)
        
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        print("Data saved successfully to", filename)
    
    def calculate_class_statistics(self):
        """
        Calculate comprehensive class statistics
        
        Returns:
        DataFrame: Updated dataframe with statistics
        """
        if self.df is None or self.df.empty:
            print("No data available!")
            return
        
        print("\n" + "="*60)
        print("CLASS STATISTICS REPORT")
        print("="*60)
        
        # Identify subject columns (exclude non-subject columns)
        subject_cols = [col for col in self.df.columns 
                       if col not in ['StudentID', 'Name', 'Age', 'Gender']]
        
        if not subject_cols:
            print("No subject data found!")
            return
        
        # Calculate average marks for each student
        self.df['Average'] = self.df[subject_cols].mean(axis=1)
        
        # Display overall statistics
        print("\nOverall Statistics:")
        print("Total Students:", len(self.df))
        print("Class Average: {:.2f}".format(self.df['Average'].mean()))
        print("Highest Average: {:.2f}".format(self.df['Average'].max()))
        print("Lowest Average: {:.2f}".format(self.df['Average'].min()))
        
        # Display subject-wise statistics
        print("\nSubject-wise Statistics:")
        for subject in subject_cols:
            avg = self.df[subject].mean()
            max_marks = self.df[subject].max()
            min_marks = self.df[subject].min()
            print("\n  {}:".format(subject))
            print("    Average: {:.2f} | Maximum: {} | Minimum: {}".format(
                avg, max_marks, min_marks))
        
        # Calculate pass/fail statistics
        self.df['Status'] = self.df['Average'].apply(
            lambda x: 'Pass' if x >= 40 else 'Fail')
        pass_count = len(self.df[self.df['Status'] == 'Pass'])
        fail_count = len(self.df[self.df['Status'] == 'Fail'])
        
        print("\nPass/Fail Analysis:")
        print("Pass: {} students ({:.1f}%)".format(
            pass_count, pass_count/len(self.df)*100))
        print("Fail: {} students ({:.1f}%)".format(
            fail_count, fail_count/len(self.df)*100))
        print("="*60)
        
        return self.df
    
    def predict_future_performance(self, student_name):
        """
        Predict future performance for a specific student
        
        Parameters:
        student_name (str): Name of the student
        """
        if self.df is None:
            print("No data available!")
            return
        
        # Find student data
        student_data = self.df[self.df['Name'] == student_name]
        if student_data.empty:
            print("Student '{}' not found!".format(student_name))
            return
        
        # Get current average
        current_avg = student_data['Average'].values[0]
        
        # Simple linear prediction with 5% improvement trend
        predicted_avg = current_avg * 1.05
        
        # Display prediction
        print("\nPERFORMANCE PREDICTION for {}".format(student_name))
        print("Current Average: {:.2f}".format(current_avg))
        print("Predicted Next Average: {:.2f}".format(predicted_avg))
        
        # Provide performance assessment
        if predicted_avg >= 75:
            print("Prediction: Excellent performance expected!")
        elif predicted_avg >= 50:
            print("Prediction: Good performance expected!")
        else:
            print("Prediction: Needs improvement!")
    
    def generate_risk_alerts(self):
        """
        Identify students at risk of failing
        
        Returns:
        DataFrame: Students at risk
        """
        if self.df is None:
            print("No data available!")
            return
        
        print("\nRISK ALERT SYSTEM")
        print("="*60)
        
        # Find students with average below 40
        at_risk = self.df[self.df['Average'] < 40]
        
        if at_risk.empty:
            print("No students at risk!")
        else:
            print("{} student(s) at risk of failing:".format(len(at_risk)))
            for idx, row in at_risk.iterrows():
                print("  - {} (ID: {}) | Average: {:.2f}".format(
                    row['Name'], row['StudentID'], row['Average']))
        print("="*60)
        
        return at_risk
    
    def get_top_performers(self, n=5):
        """
        Get top N performing students
        
        Parameters:
        n (int): Number of top performers to display
        
        Returns:
        DataFrame: Top performing students
        """
        if self.df is None:
            print("No data available!")
            return
        
        print("\nTOP {} PERFORMERS".format(n))
        print("="*60)
        
        # Get top students based on average marks
        top_students = self.df.nlargest(n, 'Average')[
            ['Name', 'StudentID', 'Average', 'Status']]
        print(top_students.to_string(index=False))
        print("="*60)
        
        return top_students

# Display confirmation
print("PerformanceAnalyzer class defined successfully!")


PerformanceAnalyzer class defined successfully!


In [16]:
# Create sample student performance data
sample_data = {
    'StudentID': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    'Name': ['Aarav Sharma', 'Diya Patel', 'Rohan Kumar', 'Ananya Singh', 
             'Vihaan Verma', 'Isha Gupta', 'Arjun Mehta', 'Kavya Reddy',
             'Aditya Joshi', 'Sanya Desai'],
    'Age': [19, 18, 20, 19, 18, 19, 20, 18, 19, 18],
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 
               'Male', 'Female', 'Male', 'Female'],
    'Mathematics': [85, 72, 45, 88, 92, 38, 76, 81, 34, 90],
    'Physics': [78, 68, 42, 85, 88, 35, 72, 79, 38, 87],
    'Chemistry': [82, 75, 48, 90, 91, 40, 78, 83, 36, 92],
    'English': [88, 80, 52, 92, 89, 42, 80, 85, 40, 91],
    'Python': [90, 77, 55, 95, 94, 45, 82, 88, 42, 93]
}

# Convert dictionary to DataFrame
df_students = pd.DataFrame(sample_data)

# Set index to start from 1 instead of 0
df_students.index = range(1, len(df_students) + 1)

# Save DataFrame to CSV file
df_students.to_csv('student_data.csv', index=False)

# Display confirmation and preview
print("Sample data created and saved to 'student_data.csv'")
print("\nData Preview:")
df_students


Sample data created and saved to 'student_data.csv'

Data Preview:


Unnamed: 0,StudentID,Name,Age,Gender,Mathematics,Physics,Chemistry,English,Python
1,101,Aarav Sharma,19,Male,85,78,82,88,90
2,102,Diya Patel,18,Female,72,68,75,80,77
3,103,Rohan Kumar,20,Male,45,42,48,52,55
4,104,Ananya Singh,19,Female,88,85,90,92,95
5,105,Vihaan Verma,18,Male,92,88,91,89,94
6,106,Isha Gupta,19,Female,38,35,40,42,45
7,107,Arjun Mehta,20,Male,76,72,78,80,82
8,108,Kavya Reddy,18,Female,81,79,83,85,88
9,109,Aditya Joshi,19,Male,34,38,36,40,42
10,110,Sanya Desai,18,Female,90,87,92,91,93


In [17]:
# Create PerformanceAnalyzer object
analyzer = PerformanceAnalyzer()

# Load student data from CSV file
analyzer.load_from_csv('student_data.csv')

# Display first few rows of data
print("\nFirst 5 rows of loaded data:")
analyzer.df.head()


Data loaded successfully from student_data.csv
Total Students: 10

First 5 rows of loaded data:


Unnamed: 0,StudentID,Name,Age,Gender,Mathematics,Physics,Chemistry,English,Python
1,101,Aarav Sharma,19,Male,85,78,82,88,90
2,102,Diya Patel,18,Female,72,68,75,80,77
3,103,Rohan Kumar,20,Male,45,42,48,52,55
4,104,Ananya Singh,19,Female,88,85,90,92,95
5,105,Vihaan Verma,18,Male,92,88,91,89,94


In [18]:
# Generate comprehensive class statistics report
result_df = analyzer.calculate_class_statistics()

# Display updated dataframe with calculated averages and status
print("\nUpdated DataFrame with Average and Status:")
result_df



CLASS STATISTICS REPORT

Overall Statistics:
Total Students: 10
Class Average: 71.76
Highest Average: 90.80
Lowest Average: 38.00

Subject-wise Statistics:

  Mathematics:
    Average: 70.10 | Maximum: 92 | Minimum: 34

  Physics:
    Average: 67.20 | Maximum: 88 | Minimum: 35

  Chemistry:
    Average: 71.50 | Maximum: 92 | Minimum: 36

  English:
    Average: 73.90 | Maximum: 92 | Minimum: 40

  Python:
    Average: 76.10 | Maximum: 95 | Minimum: 42

Pass/Fail Analysis:
Pass: 9 students (90.0%)
Fail: 1 students (10.0%)

Updated DataFrame with Average and Status:


Unnamed: 0,StudentID,Name,Age,Gender,Mathematics,Physics,Chemistry,English,Python,Average,Status
1,101,Aarav Sharma,19,Male,85,78,82,88,90,84.6,Pass
2,102,Diya Patel,18,Female,72,68,75,80,77,74.4,Pass
3,103,Rohan Kumar,20,Male,45,42,48,52,55,48.4,Pass
4,104,Ananya Singh,19,Female,88,85,90,92,95,90.0,Pass
5,105,Vihaan Verma,18,Male,92,88,91,89,94,90.8,Pass
6,106,Isha Gupta,19,Female,38,35,40,42,45,40.0,Pass
7,107,Arjun Mehta,20,Male,76,72,78,80,82,77.6,Pass
8,108,Kavya Reddy,18,Female,81,79,83,85,88,83.2,Pass
9,109,Aditya Joshi,19,Male,34,38,36,40,42,38.0,Fail
10,110,Sanya Desai,18,Female,90,87,92,91,93,90.6,Pass


In [19]:
# Display top 5 performing students
top_performers = analyzer.get_top_performers(5)



TOP 5 PERFORMERS
        Name  StudentID  Average Status
Vihaan Verma        105     90.8   Pass
 Sanya Desai        110     90.6   Pass
Ananya Singh        104     90.0   Pass
Aarav Sharma        101     84.6   Pass
 Kavya Reddy        108     83.2   Pass


In [20]:
# Identify and display at-risk students
at_risk_students = analyzer.generate_risk_alerts()



RISK ALERT SYSTEM
1 student(s) at risk of failing:
  - Aditya Joshi (ID: 109) | Average: 38.00


In [21]:
# Predict future performance for a specific student
analyzer.predict_future_performance('Aarav Sharma')

# Try prediction for another student
print("\n")
analyzer.predict_future_performance('Rohan Kumar')



PERFORMANCE PREDICTION for Aarav Sharma
Current Average: 84.60
Predicted Next Average: 88.83
Prediction: Excellent performance expected!



PERFORMANCE PREDICTION for Rohan Kumar
Current Average: 48.40
Predicted Next Average: 50.82
Prediction: Good performance expected!


In [22]:
# Create a new student object using Student class
new_student = Student(111, 'Rahul Mehta', 19, 'Male')

# Add subject marks for the new student
new_student.add_subject_marks('Mathematics', 78)
new_student.add_subject_marks('Physics', 82)
new_student.add_subject_marks('Chemistry', 75)
new_student.add_subject_marks('English', 88)
new_student.add_subject_marks('Python', 91)

# Display new student information
new_student.display_info()

# Add new student to analyzer
analyzer.add_student(new_student)
print("\nStudent added to analyzer successfully!")



Student ID: 111
Name: Rahul Mehta
Age: 19
Gender: Male
Subjects: {'Mathematics': 78, 'Physics': 82, 'Chemistry': 75, 'English': 88, 'Python': 91}
Average Marks: 82.80
Status: Pass

Student added to analyzer successfully!


In [23]:
# Save all data including newly added student to CSV
analyzer.save_to_csv('updated_student_data.csv')

# Display confirmation
print("All student data has been saved!")


Data saved successfully to updated_student_data.csv
All student data has been saved!


In [24]:
# Define subject columns for analysis
subject_cols = ['Mathematics', 'Physics', 'Chemistry', 'English', 'Python']

# Calculate average marks for each subject
subject_averages = analyzer.df[subject_cols].mean()

# Display subject-wise class averages
print("\nSUBJECT-WISE CLASS AVERAGES:")
print("="*40)
for subject, avg in subject_averages.items():
    print("{:15s}: {:.2f}".format(subject, avg))
print("="*40)

# Create DataFrame for better visualization
df_subject_avg = pd.DataFrame({
    'Subject': subject_averages.index,
    'Average Marks': subject_averages.values
})

# Set index to start from 1
df_subject_avg.index = range(1, len(df_subject_avg) + 1)

# Display subject analysis table
print("\nSubject Analysis Table:")
df_subject_avg



SUBJECT-WISE CLASS AVERAGES:
Mathematics    : 70.10
Physics        : 67.20
Chemistry      : 71.50
English        : 73.90
Python         : 76.10

Subject Analysis Table:


Unnamed: 0,Subject,Average Marks
1,Mathematics,70.1
2,Physics,67.2
3,Chemistry,71.5
4,English,73.9
5,Python,76.1
