In [16]:
import requests
import pandas as pd
import numpy as np
import xarray as xr
import netCDF4 as nc
from datetime import datetime, timedelta
import os
import urllib.request
from urllib.parse import urljoin
import json
import time

In [34]:
class GLDASFetcher:
    def __init__(self, username, password):
        """
        Initialize GLDAS data fetcher with NASA Earthdata credentials
        
        Args:
            username (str): NASA Earthdata username (REQUIRED)
            password (str): NASA Earthdata password (REQUIRED)
        """
            
        self.username = username
        self.password = password
        
        # NASA GES DISC endpoints
        self.ges_disc_base = "https://hydro1.gesdisc.eosdis.nasa.gov"
        self.opendap_base = "https://hydro1.gesdisc.eosdis.nasa.gov/opendap"
        
        # GLDAS dataset paths
        self.datasets = {
            "GLDAS_NOAH025_D": "daily_aggregated",               # Daily aggregated
        }
        
        # GLDAS variable names (actual NetCDF variable names)
        self.variables = {
            'temp': 'Tair_f_inst',           # Temperature at 2m (K)
            'humidity': 'Qair_f_inst',        # Specific humidity at 2m (kg/kg)
            'pressure': 'Psurf_f_inst',       # Surface pressure (Pa)
            'wind_u': 'W2SM',          # Wind speed (m/s)
            'precipitation': 'Rainf_f_tavg',  # Rainfall rate (kg/m^2/s)
            'solar_rad': 'SWdown_f_tavg',     # Solar radiation (W/m^2)
                    
        }

    
    def get_data(self, lat, lon, start_date, end_date, variables=None, dataset="GLDAS_NOAH025_D"):
        """
        Fetch GLDAS daily data from NASA
        
        Args:
            lat (float): Latitude (-60 to 90 for GLDAS coverage)
            lon (float): Longitude (-180 to 180)
            start_date (str): Start date 'YYYY-MM-DD'
            end_date (str): End date 'YYYY-MM-DD'
            variables (list): Variables to fetch
            dataset (str): GLDAS dataset to use (default: daily)
            
        Returns:
            pandas.DataFrame: Real GLDAS daily data
        """
        if variables is None:
            variables = ['temp', 'humidity', 'pressure', 'precipitation','Wind Speed']
        
        print(f"üåç Fetching GLDAS DAILY data from NASA")
        print(f"üìç Location: ({lat}, {lon})")
        print(f"üìÖ Dates: {start_date} to {end_date}")
        print(f"üìä Variables: {variables}")
        print(f"üóÇÔ∏è Dataset: Daily aggregated")
        
        # Convert dates
        start_dt = datetime.strptime(start_date, '%Y-%m-%d')
        end_dt = datetime.strptime(end_date, '%Y-%m-%d')
        
        all_data = []
        current_date = start_dt
        
        while current_date <= end_dt:
            
            # Get daily aggregated data for this date
            daily_data = self._fetch_daily_aggregated_data(lat, lon, current_date, variables)
            if daily_data is not None:
                all_data.append(daily_data)
                    
            current_date += timedelta(days=1)
            time.sleep(0.1) 

        if all_data:
            # Combine all data
            combined_df = pd.DataFrame(all_data)
            combined_df.set_index('date', inplace=True)
            combined_df = combined_df.sort_index()
            
            print(f"‚úÖ Successfully fetched {len(combined_df)} daily records")
            return combined_df
        else:
            print("‚ùå No data could be fetched")
            return pd.DataFrame()
    
    def _fetch_daily_aggregated_data(self, lat, lon, date, variables):
        """Fetch daily aggregated data from NASA GLDAS/POWER"""
        
        try:
            print(f"üîÑ Fetching daily data for: {date.strftime('%Y-%m-%d')}")
            
            # Use NASA POWER API for daily meteorological data
            real_data = self._get_nasa_power_daily_data(lat, lon, date, variables)
            
            return real_data
            
        except Exception as e:
            print(f"Error fetching daily data: {e}")
            return None
    
    def _get_nasa_power_daily_data(self, lat, lon, date, variables):
        """Get daily data from NASA POWER API"""
        
        try:
            # NASA POWER API for daily meteorological data
            base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
            
            # Map our variables to POWER daily parameters
            power_params = []
            if 'temp' in variables:
                power_params.extend(['T2M_MAX', 'T2M_MIN'])  # min,max temp
            if 'humidity' in variables:
                power_params.extend(['QV2M'])  # Specific humidity
            if 'pressure' in variables:
                power_params.append('PS')   # Surface pressure
            if 'precipitation' in variables:
                power_params.append('PRECTOTCORR')  # Daily precipitation (corrected)
            if 'solar_rad' in variables:
                power_params.append('ALLSKY_SFC_SW_DWN')  # Solar radiation
            if 'wind_speed' in variables:
                power_params.append('WS2M')  # Wind speed

            if not power_params:
                return None
            
            params = {
                'parameters': ','.join(power_params),
                'community': 'RE',
                'longitude': lon,
                'latitude': lat,
                'start': date.strftime('%Y%m%d'),
                'end': date.strftime('%Y%m%d'),
                'format': 'JSON'
            }
            
            # Make request to NASA POWER
            response = requests.get(base_url, params=params, timeout=30)
            
            if response.status_code == 200:
                data = response.json()
                
                # Extract the daily data
                record = {
                    'date': date.date(),
                    'latitude': lat,
                    'longitude': lon
                }
                
                date_key = f"{date.year:04d}{date.month:02d}{date.day:02d}"
                
                # Map POWER data to our variable names (daily aggregations)
                if 'temp' in variables:
                    if 'T2M_MAX' in data['properties']['parameter'] and date_key in data['properties']['parameter']['T2M_MAX']:
                        record['temp_max'] = data['properties']['parameter']['T2M_MAX'][date_key]
                    if 'T2M_MIN' in data['properties']['parameter'] and date_key in data['properties']['parameter']['T2M_MIN']:
                        record['temp_min'] = data['properties']['parameter']['T2M_MIN'][date_key]        
                    
                if 'humidity' in variables:
                    if 'QV2M' in data['properties']['parameter'] and date_key in data['properties']['parameter']['QV2M']:
                        record['humidity_specific'] = data['properties']['parameter']['QV2M'][date_key]
                    
                if 'pressure' in variables:
                    if 'PS' in data['properties']['parameter'] and date_key in data['properties']['parameter']['PS']:
                        record['pressure'] = data['properties']['parameter']['PS'][date_key]
                
                if 'precipitation' in variables:
                    if 'PRECTOTCORR' in data['properties']['parameter'] and date_key in data['properties']['parameter']['PRECTOTCORR']:
                        record['precipitation_total'] = data['properties']['parameter']['PRECTOTCORR'][date_key]
                
                if 'solar_rad' in variables:
                    if 'ALLSKY_SFC_SW_DWN' in data['properties']['parameter'] and date_key in data['properties']['parameter']['ALLSKY_SFC_SW_DWN']:
                        record['solar_radiation'] = data['properties']['parameter']['ALLSKY_SFC_SW_DWN'][date_key]
                
                
                if 'wind_speed' in variables:
                    if 'WS2M' in data['properties']['parameter'] and date_key in data['properties']['parameter']['WS2M']:
                        record['wind_speed'] = data['properties']['parameter']['WS2M'][date_key]

                return record
            else:
                print(f"NASA POWER API error: {response.status_code}")
                return None
                
        except Exception as e:
            print(f"Error fetching NASA POWER daily data: {e}")
            return None

    def _extract_point_data(self, file_url, target_lat, target_lon, variables):
        """Extract data for a specific point from GLDAS NetCDF file"""
        
        try:
            # Use requests with authentication
            session = requests.Session()
            session.auth = (self.username, self.password)
            
            # For demo, we'll simulate the NetCDF data extraction
            # In practice, you'd use xarray or netCDF4 to read from OPeNDAP
            
            print(f"üîÑ Processing: {file_url.split('/')[-1]}")
            
            # Extract timestamp from filename
            filename = file_url.split('/')[-1]
            date_part = filename.split('.')[1]  
            
            year = int(date_part[1:5])
            month = int(date_part[5:7])
            day = int(date_part[7:9])
            
            timestamp = datetime(year, month, day)
            
            # This is where you'd actually read from the NetCDF file
            # For now, using NASA POWER API as a real data source
            data = self._get_nasa_power_data(target_lat, target_lon, timestamp, variables)
            
            return data
            
        except Exception as e:
            print(f"Error extracting point data: {e}")
            return None
    
    def _get_nasa_power_data(self, lat, lon, timestamp, variables):
        """Get data from NASA POWER API as fallback"""
        
        try:
            # NASA POWER API for real meteorological data
            base_url = "https://power.larc.nasa.gov/api/temporal/hourly/point"
            
            # Map our variables to POWER parameters
            power_params = []
            if 'temp' in variables:
                power_params.append('T2M')  # Temperature at 2m
            if 'humidity' in variables:
                power_params.append('QV2M')  # Specific humidity at 2m
            if 'pressure' in variables:
                power_params.append('PS')   # Surface pressure
            if 'precipitation' in variables:
                power_params.append('PRECTOT')  # Precipitation
            if 'solar_rad' in variables:
                power_params.append('ALLSKY_SFC_SW_DWN')  # Solar radiation
            if 'wind_u' in variables:
                power_params.append('WS2M')
            
            if not power_params:
                return None
            
            params = {
                'parameters': ','.join(power_params),
                'community': 'RE',
                'longitude': lon,
                'latitude': lat,
                'start': timestamp.strftime('%Y%m%d'),
                'end': timestamp.strftime('%Y%m%d'),
                'format': 'JSON'
            }
            
            # Make request to NASA POWER
            response = requests.get(base_url, params=params, timeout=30)
            
            if response.status_code == 200:
                data = response.json()
                
                # Extract the data for our timestamp
                record = {
                    'datetime': timestamp,
                    'latitude': lat,
                    'longitude': lon
                }
               
                return record
            else:
                print(f"NASA POWER API error: {response.status_code}")
                return None
                
        except Exception as e:
            print(f"Error fetching NASA POWER data: {e}")
            return None
    
    def get_bulk_data(self, locations, start_date, end_date, variables=None):
        
        """Fetch data for multiple locations"""
        try:
            data = self.get_data(lat, lon, start_date, end_date, variables)
            if not data.empty:
                location_key = f"lat_{lat}_lon_{lon}"
                results[location_key] = data
                print(f"‚úÖ Success: {len(data)} records")
            else:
                print(f"‚ùå No data retrieved")
                    
        except Exception as e:
            print(f"‚ùå Error: {e}")
            
        
        return results
    
    def to_csv(self, data, filename):
        """Export data to CSV"""
        if isinstance(data, dict):
            all_data = []
            for location_key, df in data.items():
                df_copy = df.reset_index()
                df_copy['location'] = location_key
                all_data.append(df_copy)
            
            combined_df = pd.concat(all_data, ignore_index=True)
            combined_df.to_csv(filename, index=False)
        else:
            data.to_csv(filename)
            
        print(f"üíæ Data exported to {filename}")


In [38]:

def main():
    """Example usage with NASA data"""
    
    print("üöÄ NASA GLDAS Data Fetcher")
    print("=" * 50)
    
    
    username = "mahmoudmo12"
    password = "Mahmoudmetawe12@"
    

    # Initialize with real credentials
    fetcher = GLDASFetcher(username=username, password=password)
    city = input("enter the city: ")
    # Test with a single location
    print(f"\nüåç Fetching DAILY data for {city} City...")
    
    data = fetcher.get_data(
        lat=30.0444,
        lon=31.2358,
        start_date="2004-01-01",
        end_date="2025-09-15",
        variables=['temp', 'humidity', 'pressure', 'precipitation', 'solar_rad', 'wind_speed']
    )
    
    if not data.empty:
        print(f"\n‚úÖ SUCCESS! Retrieved {len(data)} real data points")
        print(f"üìã Columns: {list(data.columns)}")
        
        # Export real data
        fetcher.to_csv(data, "nasa_daily_weather_data.csv")
    else:
        print("‚ùå")

if __name__ == "__main__":
    main()

üöÄ NASA GLDAS Data Fetcher


enter the city:  cairo



üåç Fetching DAILY data for cairo City...
üåç Fetching GLDAS DAILY data from NASA
üìç Location: (30.0444, 31.2358)
üìÖ Dates: 2004-01-01 to 2025-09-15
üìä Variables: ['temp', 'humidity', 'pressure', 'precipitation', 'solar_rad', 'wind_speed']
üóÇÔ∏è Dataset: Daily aggregated
üîÑ Fetching daily data for: 2004-01-01
üîÑ Fetching daily data for: 2004-01-02
üîÑ Fetching daily data for: 2004-01-03
üîÑ Fetching daily data for: 2004-01-04
üîÑ Fetching daily data for: 2004-01-05
üîÑ Fetching daily data for: 2004-01-06
üîÑ Fetching daily data for: 2004-01-07
üîÑ Fetching daily data for: 2004-01-08
üîÑ Fetching daily data for: 2004-01-09
üîÑ Fetching daily data for: 2004-01-10
üîÑ Fetching daily data for: 2004-01-11
üîÑ Fetching daily data for: 2004-01-12
üîÑ Fetching daily data for: 2004-01-13
üîÑ Fetching daily data for: 2004-01-14
üîÑ Fetching daily data for: 2004-01-15
üîÑ Fetching daily data for: 2004-01-16
üîÑ Fetching daily data for: 2004-01-17
üîÑ Fetching daily 

KeyboardInterrupt: 

In [36]:
df = pd.read_csv("nasa_daily_weather_data.csv")

In [37]:
df

Unnamed: 0,date,latitude,longitude,temp_max,temp_min,humidity_specific,pressure,precipitation_total,solar_radiation,wind_speed
0,2025-09-12,30.0444,31.2358,37.88,20.63,9.68,99.38,0.0,6.6281,2.63
1,2025-09-13,30.0444,31.2358,37.79,21.53,9.02,99.5,0.0,6.6442,1.92
2,2025-09-14,30.0444,31.2358,38.65,21.7,10.24,99.53,0.0,6.4788,3.06
3,2025-09-15,30.0444,31.2358,36.43,21.73,11.38,99.54,0.0,6.4042,2.91
