# Exploratory Data Analysis (EDA)

## Objective
Understand patterns, distributions, and relationships in the cleaned dataset using statistical analysis and visualizations.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')
pd.set_option('display.max_columns', None)

## Load Cleaned Dataset

In [None]:
df = pd.read_csv('cleaned_data.csv')
df.head()

## Dataset Overview

In [None]:
df.info()

In [None]:
df.describe()

## Target Variable Distribution

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x='Mood', data=df)
plt.title('Mood Distribution')
plt.show()

## Numerical Feature Distributions

In [None]:
numerical_cols = df.select_dtypes(include=np.number).columns

df[numerical_cols].hist(figsize=(12,10))
plt.tight_layout()
plt.show()

## Correlation Heatmap

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df[numerical_cols].corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

## Mood vs Key Features

In [None]:
features = ['Sleep_Hours', 'Steps', 'Study_Hours', 'Water_Intake_ml']

for col in features:
    plt.figure(figsize=(6,4))
    sns.boxplot(x='Mood', y=col, data=df)
    plt.title(f'{col} vs Mood')
    plt.show()

## Categorical Feature Analysis

In [None]:
categorical_cols = df.select_dtypes(include='object').columns

for col in categorical_cols:
    plt.figure(figsize=(6,4))
    sns.countplot(x=col, hue='Mood', data=df)
    plt.xticks(rotation=45)
    plt.title(f'{col} vs Mood')
    plt.show()

## Key Insights
- Sleep hours and mood show a strong relationship
- Physical activity positively impacts mood
- Hydration and study hours also influence emotional state
