In [72]:
import warnings
import requests
import time
from datetime import datetime, timedelta
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np

warnings.filterwarnings("ignore", message="This Pipeline instance is not fitted yet")


class GLDASFetcher:
    """
    Fetches daily weather data from the NASA POWER API for a given location.
    """

    def __init__(self):
        # Map simplified variable names to NASA POWER API parameter codes
        self.variables_map = {
            'temp': ['T2M_MAX', 'T2M_MIN'],
            'humidity': ['QV2M'],
            'pressure': ['PS'],
            'precipitation': ['PRECTOTCORR'],
            'solar_rad': ['ALLSKY_SFC_SW_DWN'],
            'wind_speed': ['WS2M'],
            'evapotranspiration': ['EVPTRNS'],
            'soil_moisture': ['GWETTOP'],
            'dew_point': ['T2MDEW']
        }



        # Reverse map to rename columns to human-friendly names
        self.api_to_friendly_map = {
            'T2M_MAX': 'temp_max',
            'T2M_MIN': 'temp_min',
            'QV2M': 'humidity_specific',
            'PS': 'pressure',
            'PRECTOTCORR': 'precipitation_total',
            'ALLSKY_SFC_SW_DWN': 'solar_radiation',
            'WS2M': 'wind_speed',
            'EVPTRNS': 'evapotranspiration',
            'GWETTOP': 'soil_moisture_surface',
            'T2MDEW': 'dew_point_temp'
        }


    def get_data(self, lat, lon, start_date, end_date, variables=None):
        """
        Fetch NASA POWER daily data for the given location and date range.
        """
        if variables is None:
            variables = list(self.variables_map.keys())

        base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"

        # Collect all NASA POWER parameter codes
        power_params = []
        for var in variables:
            power_params.extend(self.variables_map.get(var, []))

        params = {
            'parameters': ','.join(power_params),
            'community': 'RE',  # Renewable Energy community
            'longitude': lon,
            'latitude': lat,
            'start': start_date.replace("-", ""),
            'end': end_date.replace("-", ""),
            'format': 'JSON'
        }

        print(f"üåç Fetching NASA POWER data for ({lat}, {lon}) from {start_date} to {end_date} ...")

        try:
            response = requests.get(base_url, params=params, timeout=60)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"‚ùå Request error: {e}")
            return pd.DataFrame()

        try:
            data = response.json()['properties']['parameter']
        except KeyError:
            print("‚ùå API response missing 'properties.parameter'")
            return pd.DataFrame()

        # Build dataframe manually
        records = {}
        for var, timeseries in data.items():
            for date_str, value in timeseries.items():
                if date_str not in records:
                    records[date_str] = {}
                records[date_str][var] = value

        if not records:
            print("‚ùå No data found in API response.")
            return pd.DataFrame()

        df = pd.DataFrame.from_dict(records, orient='index')
        df.index = pd.to_datetime(df.index, format="%Y%m%d")
        df.index.name = "date"
        df.reset_index(inplace=True)

        # Add metadata
        df['lat'] = lat
        df['lon'] = lon

        # Rename columns to friendly names
        df.rename(columns=self.api_to_friendly_map, inplace=True)

        print(f"‚úÖ Retrieved {len(df)} daily records")
        return df

    def get_location_by_address(self, address, retries=3):
        """
        Convert a city name/address to latitude & longitude using Nominatim.
        Retries if it fails.
        """
        geolocator = Nominatim(user_agent="gldas_fetcher")
        for attempt in range(retries):
            try:
                time.sleep(1)
                location = geolocator.geocode(address)
                if location:
                    return {'lat': location.latitude, 'lon': location.longitude, 'display_name': location.address}
            except Exception as e:
                print(f"‚ö†Ô∏è Geocoding failed ({e}), retrying ({attempt + 1}/{retries})...")
        print("‚ùå Failed to geocode address after multiple attempts.")
        return None


def main():
    print("üöÄ NASA GLDAS Data Fetcher")
    print("=" * 50)

    fetcher = GLDASFetcher()
    city = input("Enter city name: ")

    location = fetcher.get_location_by_address(city)
    if not location:
        print("‚ùå Could not get location data.")
        return None

    lat, lon = location["lat"], location["lon"]
    city_name = location["display_name"]

    print(f"\nüåç Fetching data for {city_name}")

    data = fetcher.get_data(
        lat=lat,
        lon=lon,
        start_date="1984-01-01",
        end_date=(datetime.now() - timedelta(days=6)).strftime("%Y-%m-%d"),
        variables=['temp', 'humidity', 'pressure', 'precipitation', 'solar_rad', 'wind_speed','evapotranspiration', 'soil_moisture', 'dew_point']

    )

    if data.empty:
        print("‚ùå No data retrieved.")
        return None

    filename = f"nasa_daily_weather.csv"
    data.to_csv(filename, index=False)
    print(f"üíæ Data saved as: {filename}")

    return data


In [73]:
if __name__ == "__main__":
    df = main()


üöÄ NASA GLDAS Data Fetcher


Enter city name:  Giza



üåç Fetching data for ÿßŸÑÿ¨Ÿäÿ≤ÿ©, 12524, ŸÖÿµÿ±
üåç Fetching NASA POWER data for (29.9870753, 31.2118063) from 1984-01-01 to 2025-10-31 ...
‚úÖ Retrieved 15280 daily records
üíæ Data saved as: nasa_daily_weather.csv


In [74]:
df

Unnamed: 0,date,temp_max,temp_min,humidity_specific,pressure,precipitation_total,solar_radiation,wind_speed,evapotranspiration,soil_moisture_surface,dew_point_temp,lat,lon
0,1984-01-01,19.55,8.60,6.21,100.37,0.08,3.7054,2.82,0.0000,0.20,6.71,29.987075,31.211806
1,1984-01-02,19.51,8.81,6.00,100.85,0.00,3.3499,2.24,0.0000,0.20,6.41,29.987075,31.211806
2,1984-01-03,19.69,7.56,6.24,100.89,0.06,3.5191,1.74,0.0000,0.20,7.05,29.987075,31.211806
3,1984-01-04,18.43,7.94,6.17,100.55,0.01,3.4500,1.86,0.0000,0.20,6.68,29.987075,31.211806
4,1984-01-05,18.31,6.57,5.72,100.46,0.00,3.5035,1.35,0.0000,0.20,5.72,29.987075,31.211806
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15275,2025-10-27,31.16,19.02,10.49,99.75,0.00,4.8694,1.74,0.0005,0.09,14.59,29.987075,31.211806
15276,2025-10-28,31.67,18.06,9.83,99.71,0.00,4.7926,1.97,0.0002,0.09,13.41,29.987075,31.211806
15277,2025-10-29,31.26,17.27,9.62,99.89,0.00,4.8221,2.67,0.0005,0.09,13.10,29.987075,31.211806
15278,2025-10-30,29.68,18.60,10.93,99.91,0.01,4.7609,3.32,0.0005,0.09,15.28,29.987075,31.211806


In [75]:
df[df==-999.00].count()

date                     0
temp_max                 0
temp_min                 0
humidity_specific        0
pressure                 0
precipitation_total      0
solar_radiation          0
wind_speed               0
evapotranspiration       0
soil_moisture_surface    0
dew_point_temp           0
lat                      0
lon                      0
dtype: int64

In [76]:
df.isnull().sum()

date                     0
temp_max                 0
temp_min                 0
humidity_specific        0
pressure                 0
precipitation_total      0
solar_radiation          0
wind_speed               0
evapotranspiration       0
soil_moisture_surface    0
dew_point_temp           0
lat                      0
lon                      0
dtype: int64