# Age Distribution Analytics
This notebook visualizes the age distribution of a synthetic population, including a gender-wise breakdown. It is intended as a professional-level data visualization mini-project.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Generate synthetic age and gender data
np.random.seed(42)
n = 500
ages = np.random.normal(loc=35, scale=12, size=n)
ages = [int(age) for age in ages if 0 < age < 100]

# Ensure we have enough data points
while len(ages) < n:
    more_ages = np.random.normal(loc=35, scale=12, size=(n - len(ages)))
    ages += [int(age) for age in more_ages if 0 < age < 100]

# Assign genders randomly
genders = np.random.choice(['Male', 'Female'], size=n)

# Create DataFrame
data = pd.DataFrame({
    'Age': ages[:n],
    'Gender': genders
})
data.head()


## Overall Age Distribution

In [None]:

plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='Age', bins=15, kde=True, color='skyblue', edgecolor='black')
plt.title('Age Distribution in Surveyed Population', fontsize=16)
plt.xlabel('Age')
plt.ylabel('Number of People')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("age_histogram.png")
plt.show()


## Age Distribution by Gender

In [None]:

plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='Age', hue='Gender', bins=15, kde=True, palette='Set2', edgecolor='black', multiple='stack')
plt.title('Age Distribution by Gender in Surveyed Population', fontsize=16)
plt.xlabel('Age')
plt.ylabel('Number of People')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("gender_age_histogram.png")
plt.show()


## Code to Save Both Graphs

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Recreate data
np.random.seed(42)
n = 500
ages = np.random.normal(loc=35, scale=12, size=n)
ages = [int(age) for age in ages if 0 < age < 100]
while len(ages) < n:
    more_ages = np.random.normal(loc=35, scale=12, size=(n - len(ages)))
    ages += [int(age) for age in more_ages if 0 < age < 100]
genders = np.random.choice(['Male', 'Female'], size=n)
data = pd.DataFrame({'Age': ages[:n], 'Gender': genders})

# Save age histogram
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='Age', bins=15, kde=True, color='skyblue', edgecolor='black')
plt.title('Age Distribution in Surveyed Population')
plt.xlabel('Age')
plt.ylabel('Number of People')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("age_histogram.png")
plt.close()

# Save gender-wise age histogram
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='Age', hue='Gender', bins=15, kde=True, palette='Set2', edgecolor='black', multiple='stack')
plt.title('Age Distribution by Gender in Surveyed Population')
plt.xlabel('Age')
plt.ylabel('Number of People')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("gender_age_histogram.png")
plt.close()
