In [None]:
#  Data Science Project on Birth Rates 


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


data = pd.DataFrame({
    'Country': ['CountryA', 'CountryB', 'CountryC', 'CountryD', 'CountryE', 'CountryF', 'CountryG'],
    'Year': [2020, 2020, 2020, 2020, 2020, 2020, 2020],
    'BirthRate': [11.3, 33.4, 22.5, 17.6, 8.9, 25.7, 14.2],
    'GDP': [48000, 1500, 9500, 13000, 51000, 4000, 22000],
    'FertilityRate': [1.7, 5.2, 3.1, 2.8, 1.4, 4.0, 2.2],
    'FemaleLiteracyRate': [97, 58, 74, 69, 98, 63, 85],
    'UrbanPopulation%': [82, 32, 55, 49, 90, 38, 66]
})



print("\nData Info:")
print(data.info())
print("\nMissing Values:")
print(data.isnull().sum())



sns.set(style="whitegrid")


plt.figure(figsize=(6,4))
sns.histplot(data['BirthRate'], kde=True)
plt.title("Distribution of Birth Rates")
plt.show()


plt.figure(figsize=(8,6))
sns.heatmap(data.drop(columns=['Country', 'Year']).corr(), annot=True, cmap='YlGnBu')
plt.title("Correlation Matrix")
plt.show()

plt.figure(figsize=(6,4))
sns.scatterplot(x='GDP', y='BirthRate', data=data, hue='Country', s=100)
plt.title("GDP vs Birth Rate") 
plt.show()


plt.figure(figsize=(6,4))
sns.scatterplot(x='FemaleLiteracyRate', y='BirthRate', data=data, hue='Country', s=100)
plt.title("Female Literacy Rate vs Birth Rate")
plt.show()



features = data[['BirthRate', 'FertilityRate', 'GDP', 'FemaleLiteracyRate', 'UrbanPopulation%']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)


kmeans = KMeans(n_clusters=3, random_state=42)
data['Cluster'] = kmeans.fit_predict(scaled_features)


plt.figure(figsize=(6,4))
sns.scatterplot(data=data, x='FertilityRate', y='BirthRate', hue='Cluster', palette='Set2', s=100)
plt.title("Clusters of Countries Based on Birth Factors")
plt.show()


print("\n--- Conclusion ---")
print("1. Higher fertility rates and lower female literacy are associated with higher birth rates.")
print("2. Developed countries (higher GDP & literacy) tend to have lower birth rates.")
print("3. Clustering reveals patterns across countries, helping in targeted policy-making.")