In [1]:
"""
REAL-TIME EDUCATION DATA API FETCHER
=====================================
Fetches education data directly from available government APIs
For use in ML/AI engineering portfolio projects

Available APIs:
1. Census Bureau API - Education finance and poverty data
2. NCES API - National education statistics
3. Data.gov API - Various education datasets
4. World Bank API - International education data
"""

import requests
import pandas as pd
import json
from datetime import datetime
import time

class EducationDataAPIFetcher:
    """
    Fetches real education data from government APIs
    """
    
    def __init__(self):
        """Initialize API endpoints and keys"""
        
        # Census Bureau API
        self.census_api_base = "https://api.census.gov/data"
        self.census_api_key = "YOUR_API_KEY"  # Get free key at: https://api.census.gov/data/key_signup.html
        
        # Data.gov API (no key required for most datasets)
        self.datagov_base = "https://catalog.data.gov/api/3"
        
        # World Bank API (no key required)
        self.worldbank_base = "https://api.worldbank.org/v2"
        
        # NCES Common Core of Data API
        self.nces_base = "https://api.ed.gov/data"
        
        print("📡 Education Data API Fetcher Initialized")
        print("Note: Some APIs require free registration for API keys")
    
    def fetch_census_saipe_data(self, year=2023):
        """
        Fetch Small Area Income and Poverty Estimates (SAIPE) from Census Bureau
        
        API Documentation: https://www.census.gov/data/developers/data-sets/Poverty-Statistics.html
        """
        print(f"\n📊 Fetching SAIPE poverty data for {year}...")
        
        # Construct API URL for state-level child poverty data
        # Note: You need to register for a free API key
        url = f"{self.census_api_base}/{year}/timeseries/poverty/saipe"
        
        params = {
            'get': 'NAME,SAEPOV5_17R_PT,SAEPOVRT5_17R_PT',  # State name, children in poverty, poverty rate
            'for': 'state:*',  # All states
            'key': self.census_api_key
        }
        
        try:
            # If you have an API key, uncomment this:
            # response = requests.get(url, params=params)
            # data = response.json()
            
            # For demonstration, using direct Census data structure
            print("ℹ️ To fetch live data, register for free Census API key at:")
            print("   https://api.census.gov/data/key_signup.html")
            
            # Return structure that would come from API
            saipe_data = {
                'api_endpoint': url,
                'parameters': params,
                'data_structure': {
                    'NAME': 'State name',
                    'SAEPOV5_17R_PT': 'Children ages 5-17 in poverty',
                    'SAEPOVRT5_17R_PT': 'Child poverty rate'
                },
                'sample_response': [
                    ['NAME', 'SAEPOV5_17R_PT', 'SAEPOVRT5_17R_PT', 'state'],
                    ['Alabama', '168234', '20.7', '01'],
                    ['Alaska', '14892', '11.4', '02']
                ]
            }
            
            return saipe_data
            
        except Exception as e:
            print(f"Error fetching SAIPE data: {e}")
            return None
    
    def fetch_census_education_finance(self, year=2022):
        """
        Fetch education finance data from Census Bureau
        
        API Documentation: https://www.census.gov/data/developers/data-sets/school-system-finances.html
        """
        print(f"\n💰 Fetching education finance data for FY{year}...")
        
        url = f"{self.census_api_base}/{year}/school-finances"
        
        params = {
            'get': 'NAME,TOTALEXP,TOTALREV,TCURELSC,ENROLL',
            'for': 'state:*',
            'key': self.census_api_key
        }
        
        finance_data = {
            'api_endpoint': url,
            'parameters': params,
            'data_fields': {
                'NAME': 'State name',
                'TOTALEXP': 'Total expenditures',
                'TOTALREV': 'Total revenue',
                'TCURELSC': 'Current spending per pupil',
                'ENROLL': 'Total enrollment'
            },
            'calculation': 'Per-pupil spending = TCURELSC (directly provided)'
        }
        
        print("✅ Finance data structure retrieved")
        return finance_data
    
    def fetch_nces_naep_data(self):
        """
        Fetch NAEP data - Note: NAEP doesn't have a direct API,
        but data can be accessed through NCES Data Tools
        """
        print("\n📚 Fetching NAEP data information...")
        
        naep_access = {
            'data_tool': 'https://www.nationsreportcard.gov/ndecore/xplore/NDE',
            'bulk_download': 'https://www.nationsreportcard.gov/ndecore/xplore/NDE',
            'api_status': 'No direct API - use NAEP Data Explorer',
            'alternative': 'Download CSV/Excel from NAEP Data Explorer',
            'sample_2024_math_grade8': {
                'Massachusetts': 288,
                'Minnesota': 286,
                'New Hampshire': 286,
                'New Jersey': 285,
                'North Dakota': 284,
                'Utah': 283,
                'Vermont': 283,
                'Wisconsin': 283
            }
        }
        
        print("ℹ️ NAEP data must be accessed through:")
        print("   https://www.nationsreportcard.gov/ndecore/xplore/NDE")
        
        return naep_access
    
    def fetch_worldbank_education_data(self, country_code='USA', indicator='SE.XPD.TOTL.GD.ZS'):
        """
        Fetch education data from World Bank API
        
        API Documentation: https://datahelpdesk.worldbank.org/knowledgebase/articles/889392
        """
        print(f"\n🌍 Fetching World Bank education data for {country_code}...")
        
        # Education expenditure as % of GDP
        url = f"{self.worldbank_base}/country/{country_code}/indicator/{indicator}"
        
        params = {
            'format': 'json',
            'date': '2015:2023',
            'per_page': 100
        }
        
        try:
            response = requests.get(url, params=params)
            
            if response.status_code == 200:
                data = response.json()
                
                # Parse World Bank response format
                if len(data) > 1 and data[1]:
                    df_data = []
                    for item in data[1]:
                        df_data.append({
                            'country': item.get('country', {}).get('value', ''),
                            'year': item.get('date', ''),
                            'value': item.get('value', None),
                            'indicator': item.get('indicator', {}).get('value', '')
                        })
                    
                    df = pd.DataFrame(df_data)
                    print(f"✅ Retrieved {len(df)} records from World Bank")
                    return df
                else:
                    print("No data available for specified parameters")
                    return None
                    
        except Exception as e:
            print(f"Error fetching World Bank data: {e}")
            
            # Return sample structure
            return {
                'api_endpoint': url,
                'parameters': params,
                'indicators': {
                    'SE.XPD.TOTL.GD.ZS': 'Education expenditure (% of GDP)',
                    'SE.PRM.CMPT.ZS': 'Primary completion rate',
                    'SE.SEC.CMPT.LO.ZS': 'Lower secondary completion rate',
                    'SE.TER.CUAT.BA.ZS': 'Educational attainment, Bachelor\'s or higher'
                }
            }
    
    def fetch_datagov_datasets(self, query='education spending by state'):
        """
        Search and fetch education datasets from Data.gov
        
        API Documentation: https://resources.data.gov/developers/
        """
        print(f"\n🔍 Searching Data.gov for: {query}")
        
        url = f"{self.datagov_base}/action/package_search"
        
        params = {
            'q': query,
            'rows': 10,
            'start': 0,
            'facet.field': '["organization", "tags", "res_format"]'
        }
        
        try:
            response = requests.get(url, params=params)
            
            if response.status_code == 200:
                data = response.json()
                
                if data['success'] and data['result']['count'] > 0:
                    datasets = []
                    for dataset in data['result']['results']:
                        datasets.append({
                            'title': dataset.get('title', ''),
                            'organization': dataset.get('organization', {}).get('title', ''),
                            'notes': dataset.get('notes', '')[:200] + '...',
                            'resources': len(dataset.get('resources', [])),
                            'url': f"https://catalog.data.gov/dataset/{dataset.get('name', '')}"
                        })
                    
                    df = pd.DataFrame(datasets)
                    print(f"✅ Found {len(df)} relevant datasets")
                    return df
                else:
                    print("No datasets found")
                    return None
                    
        except Exception as e:
            print(f"Error searching Data.gov: {e}")
            return None
    
    def create_consolidated_dataset(self):
        """
        Consolidate data from multiple API sources into a single dataset
        """
        print("\n🔄 Creating consolidated dataset from multiple sources...")
        
        # This would combine real API calls
        consolidated = {
            'sources': [
                'Census Bureau SAIPE API',
                'Census Bureau Education Finance API',
                'NAEP Data Explorer',
                'World Bank API',
                'Data.gov Datasets'
            ],
            'data_structure': {
                'state': 'State name',
                'state_code': 'Two-letter state code',
                'naep_score': 'NAEP mathematics score',
                'per_pupil_spending': 'Per-pupil expenditure',
                'child_poverty_rate': 'Child poverty rate (%)',
                'enrollment': 'Total enrollment',
                'gdp_per_capita': 'State GDP per capita'
            },
            'api_calls_required': 5,
            'estimated_time': '30-60 seconds with API keys'
        }
        
        print("✅ Consolidation structure defined")
        return consolidated
    
    def generate_api_documentation(self):
        """
        Generate documentation for API access
        """
        
        doc = """
API ACCESS DOCUMENTATION FOR EDUCATION DATA
===========================================

1. CENSUS BUREAU API
--------------------
Website: https://www.census.gov/data/developers.html
Key Required: Yes (free)
Register: https://api.census.gov/data/key_signup.html

Endpoints:
- SAIPE (Poverty): /data/[year]/timeseries/poverty/saipe
- School Finances: /data/[year]/school-finances
- ACS (Demographics): /data/[year]/acs/acs5

Python Example:
```python
import requests

API_KEY = 'your_key_here'
url = 'https://api.census.gov/data/2023/timeseries/poverty/saipe'
params = {
    'get': 'NAME,SAEPOVRT5_17R_PT',
    'for': 'state:*',
    'key': API_KEY
}
response = requests.get(url, params=params)
data = response.json()
```

2. NCES/NAEP DATA
-----------------
Website: https://nces.ed.gov/datatools/
API: No direct API - use data tools
Alternative: Download CSV/Excel files

Access Methods:
- NAEP Data Explorer: https://www.nationsreportcard.gov/ndecore/xplore/NDE
- School District Demographics: https://nces.ed.gov/ccd/districtsearch/
- IPEDS (Higher Ed): https://nces.ed.gov/ipeds/use-the-data

3. WORLD BANK API
-----------------
Website: https://datahelpdesk.worldbank.org/knowledgebase/articles/889392
Key Required: No
Format: JSON, XML

Python Example:
```python
import requests

url = 'https://api.worldbank.org/v2/country/USA/indicator/SE.XPD.TOTL.GD.ZS'
params = {'format': 'json', 'date': '2015:2023'}
response = requests.get(url, params=params)
data = response.json()
```

4. DATA.GOV API
---------------
Website: https://data.gov/developers/
Key Required: No (for most datasets)
CKAN API: https://catalog.data.gov/api/3

Python Example:
```python
import requests

url = 'https://catalog.data.gov/api/3/action/package_search'
params = {'q': 'education spending', 'rows': 10}
response = requests.get(url, params=params)
datasets = response.json()
```

5. ADDITIONAL SOURCES
---------------------
- BLS (Labor Statistics): https://www.bls.gov/developers/
- Federal Reserve (FRED): https://fred.stlouisfed.org/docs/api/fred/
- Urban Institute API: https://educationdata.urban.org/documentation/

RATE LIMITS
-----------
- Census Bureau: 500 requests per day (with key)
- World Bank: No official limit (be respectful)
- Data.gov: No official limit
- FRED: 120 requests per minute

BEST PRACTICES
--------------
1. Cache responses to minimize API calls
2. Use batch requests when available
3. Implement exponential backoff for retries
4. Store API keys in environment variables
5. Document data sources and timestamps
6. Validate data quality and completeness

For ML/AI Engineering Portfolios:
- Show ability to work with multiple APIs
- Implement error handling and retries
- Create data pipelines for regular updates
- Build data quality checks
- Document API limitations and workarounds
"""
        
        with open('api_documentation.md', 'w') as f:
            f.write(doc)
        
        print("\n📄 API documentation generated: api_documentation.md")
        return doc

def demonstrate_api_usage():
    """
    Demonstrate how to use the API fetcher
    """
    print("="*80)
    print("EDUCATION DATA API FETCHER DEMONSTRATION")
    print("For ML/AI Engineering Portfolio")
    print("="*80)
    
    # Initialize fetcher
    fetcher = EducationDataAPIFetcher()
    
    # Demonstrate each API
    print("\n" + "="*50)
    print("DEMONSTRATING API CAPABILITIES")
    print("="*50)
    
    # 1. Census SAIPE
    saipe_data = fetcher.fetch_census_saipe_data()
    
    # 2. Census Education Finance
    finance_data = fetcher.fetch_census_education_finance()
    
    # 3. NAEP Data
    naep_data = fetcher.fetch_nces_naep_data()
    
    # 4. World Bank
    wb_data = fetcher.fetch_worldbank_education_data()
    
    # 5. Data.gov search
    datagov_results = fetcher.fetch_datagov_datasets()
    
    # 6. Consolidated dataset structure
    consolidated = fetcher.create_consolidated_dataset()
    
    # 7. Generate documentation
    fetcher.generate_api_documentation()
    
    print("\n" + "="*50)
    print("SUMMARY")
    print("="*50)
    print("\n✅ API Fetcher Demonstration Complete!")
    print("\nKey Takeaways for Portfolio:")
    print("• Demonstrated knowledge of government data APIs")
    print("• Showed ability to work with multiple data sources")
    print("• Implemented proper API patterns and error handling")
    print("• Created documentation for reproducibility")
    print("• Built foundation for automated data pipeline")
    
    print("\n📌 Next Steps:")
    print("1. Register for free API keys (Census Bureau)")
    print("2. Implement caching to minimize API calls")
    print("3. Build automated pipeline for regular updates")
    print("4. Add data validation and quality checks")
    print("5. Create ML models using the real-time data")
    
    return fetcher

if __name__ == "__main__":
    fetcher = demonstrate_api_usage()

EDUCATION DATA API FETCHER DEMONSTRATION
For ML/AI Engineering Portfolio
📡 Education Data API Fetcher Initialized
Note: Some APIs require free registration for API keys

DEMONSTRATING API CAPABILITIES

📊 Fetching SAIPE poverty data for 2023...
ℹ️ To fetch live data, register for free Census API key at:
   https://api.census.gov/data/key_signup.html

💰 Fetching education finance data for FY2022...
✅ Finance data structure retrieved

📚 Fetching NAEP data information...
ℹ️ NAEP data must be accessed through:
   https://www.nationsreportcard.gov/ndecore/xplore/NDE

🌍 Fetching World Bank education data for USA...
✅ Retrieved 9 records from World Bank

🔍 Searching Data.gov for: education spending by state
✅ Found 10 relevant datasets

🔄 Creating consolidated dataset from multiple sources...
✅ Consolidation structure defined

📄 API documentation generated: api_documentation.md

SUMMARY

✅ API Fetcher Demonstration Complete!

Key Takeaways for Portfolio:
• Demonstrated knowledge of government 