<a href="https://colab.research.google.com/github/tanishadewangan81-pixel/student-exam-stress-analysis/blob/main/Student_Exam_Stress_Survey_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Student Exam Stress Survey Analysis

In [None]:
import pandas as pd
import numpy as np
import random

# --- Configuration ---
num_students = 1000

# --- Data Generation ---
data = []
student_ids = [f"STD{i}" for i in range(1001, 1001 + num_students)]

for student_id in student_ids:
    # 1. Study Hours (Random between 1 and 14 hours)
    study_hours = round(random.uniform(1.0, 14.0), 1)

    # 2. Sleep Hours (Random between 3 and 10 hours)
    sleep_hours = round(random.uniform(3.0, 10.0), 1)

    # 3. Calculate Stress Level (1-10)
    # Logic: Less sleep + More study = Higher Stress
    # Base stress starts at 5.
    # Subtracting sleep reduces stress. Adding study increases it.
    base_stress = 5 + (study_hours * 0.3) - (sleep_hours * 0.5)

    # Add random "life noise" (-1 to +1)
    noise = random.uniform(-1, 1)
    final_stress = int(round(base_stress + noise))

    # Clamp stress between 1 and 10
    final_stress = max(1, min(10, final_stress))

    data.append([student_id, study_hours, sleep_hours, final_stress])

# --- Save to CSV ---
columns = ['StudentID', 'Study_Hours', 'Sleep_Hours', 'Stress_Level']
df = pd.DataFrame(data, columns=columns)

df.to_csv('stress_survey.csv', index=False)

print("‚úÖ File 'stress_survey.csv' generated with 1000 student records!")
print(df.head())

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load Data
df = pd.read_csv('stress_survey.csv')

# --- PART A: Stats Report ---
print("--- üìä STRESS SURVEY STATS ---")

avg_stress = df['Stress_Level'].mean()
high_stress_count = len(df[df['Stress_Level'] >= 8]) # Students with stress 8, 9, or 10

print(f"Total Students Surveyed: {len(df)}")
print(f"üß† Average Stress Level: {avg_stress:.2f} / 10")
print(f"‚ö†Ô∏è High Risk Students (Level 8+): {high_stress_count}")
print(f"üò¥ Average Sleep: {df['Sleep_Hours'].mean():.1f} hours")
print("-" * 30)

# Check Correlation: Does Sleep affect Stress?
correlation = df['Sleep_Hours'].corr(df['Stress_Level'])
print(f"Correlation (Sleep vs. Stress): {correlation:.2f}")
print("(Note: A negative number means MORE sleep = LESS stress)")


# --- PART B: Visualization ---
plt.figure(figsize=(14, 6))

# Chart 1: Stress Level Distribution (Histogram)
plt.subplot(1, 2, 1)
sns.histplot(df['Stress_Level'], bins=10, kde=True, color='red')
plt.title('Distribution of Student Stress Levels')
plt.xlabel('Stress Level (1-10)')
plt.ylabel('Count of Students')

# Chart 2: Sleep vs. Stress (Box Plot)
# This shows the PATTERN: Do tired students have higher stress?
plt.subplot(1, 2, 2)
sns.boxplot(x='Stress_Level', y='Sleep_Hours', data=df, palette='coolwarm_r')
plt.title('Sleep Hours by Stress Level')
plt.xlabel('Stress Level')
plt.ylabel('Hours of Sleep')

plt.tight_layout()
plt.show()