In [None]:
import requests
import pandas as pd
import ee
import datetime
import re
import time

In [None]:
district_coords = [
    {"district": "Thanjavur", "lat": 10.78, "lon": 79.13},
    {"district": "Thiruvarur", "lat": 10.76, "lon": 79.83},
    {"district": "Mayiladuthurai", "lat": 11.11, "lon": 79.67},
    {"district": "Tiruvallur", "lat": 13.13, "lon": 80.01},
    {"district": "Tiruvannamalai", "lat": 12.23, "lon": 79.11},
    {"district": "Krishnagiri", "lat": 12.74, "lon": 77.83},
    {"district": "Dharmapuri", "lat": 12.06, "lon": 78.16},
    {"district": "Salem", "lat": 11.60, "lon": 78.61},
    {"district": "Erode", "lat": 11.41, "lon": 77.22},
    {"district": "The Nilgiris", "lat": 11.40, "lon": 76.70},
    {"district": "Madurai", "lat": 9.97, "lon": 77.78},
    {"district": "Theni", "lat": 10.01, "lon": 77.47},
    {"district": "Dindigul", "lat": 10.51, "lon": 77.83},
    {"district": "Sivaganga", "lat": 9.84, "lon": 78.48},
    {"district": "Ramanathapuram", "lat": 9.38, "lon": 78.83},
    {"district": "Tirunelveli", "lat": 8.71, "lon": 77.75},
    {"district": "Thoothukudi", "lat": 9.18, "lon": 77.86},
    {"district": "Chengalpattu", "lat": 12.68, "lon": 79.99},
    {"district": "Karur", "lat": 10.95, "lon": 78.07},
    {"district": "Perambalur", "lat": 11.23, "lon": 78.88},
    {"district": "Tenkasi", "lat": 8.97, "lon": 77.30},
    {"district": "Ranipet", "lat": 12.93, "lon": 79.31}
]


In [None]:
df_coords = pd.DataFrame(district_coords)

In [None]:
# ========== 1) SoilGrids API ==========

# ---------------------------
# CONFIG
# ---------------------------
API_URL = "https://rest.isric.org/soilgrids/v2.0/properties/query"
# List of (latitude, longitude) pairs
"""coordinates = [
    (16.3067, 80.4365),  # Example: Guntur, Andhra Pradesh
    (15.8281, 78.0373),  # Example: Kurnool
    (13.0827, 80.2707),  # Example: Chennai
]"""

# Soil properties to fetch
layers = [
    "bdod", "cec", "cfvo", "clay", "nitrogen", "ocd", "ocs",
    "phh2o", "sand", "silt", "soc", "wv0010", "wv0033", "wv1500"
]


# ---------------------------
# HELPER FUNCTIONS
# ---------------------------
def extract_top_depth(depth_label):
    """Extract numeric top depth for sorting."""
    match = re.match(r"(\d+)", depth_label)
    return int(match.group(1)) if match else None


def get_soil_data(lat, lon):
    """Fetch soil data for one coordinate and return as wide DataFrame."""
    response = requests.get(API_URL, params={"lon": lon, "lat": lat})

    # Check for successful response before parsing JSON
    if response.status_code == 200:
        try:
            data = response.json()
        except requests.exceptions.JSONDecodeError:
            print(f"Error decoding JSON for lat={lat}, lon={lon}. Response content: {response.text}")
            return None
    else:
        print(f"Error fetching data for lat={lat}, lon={lon}. Status code: {response.status_code}, Response content: {response.text}")
        return None


    # Collect rows in long format
    rows = []
    for layer in data["properties"]["layers"]:
        name = layer["name"]
        if name not in layers:
            continue  # skip unwanted layers
        for dep in layer["depths"]:
            label = dep["label"]
            mean_value = dep["values"].get("mean")
            rows.append({"Depth": label, "Layer": name, "Mean": mean_value})

    df_long = pd.DataFrame(rows)
    if df_long.empty:
        return None

    # Convert to wide format
    df_wide = df_long.pivot(index="Depth", columns="Layer", values="Mean").reset_index()

    # Sort by depth
    df_wide["top_depth"] = df_wide["Depth"].apply(extract_top_depth)
    df_wide = df_wide.sort_values("top_depth").drop(columns=["top_depth"]).reset_index(drop=True)

    """# Add location info
    df_wide["latitude"] = lat
    df_wide["longitude"] = lon
"""
    return df_wide


# ---------------------------
# MAIN SCRIPT
# ---------------------------
all_data = []

import time # Import the time module

for loc in district_coords:
    lat, lon = loc["lat"], loc["lon"]
    district = loc["district"]

    print(f"Fetching soil data for {district} (lat={lat}, lon={lon})...")
    df_location = get_soil_data(lat, lon)

    if df_location is not None:
        df_location["District"] = district   # ‚úÖ Add context
        df_location["Latitude"] = lat
        df_location["Longitude"] = lon
        all_data.append(df_location)
        print(f"‚úÖ Data fetched for {district} ({len(df_location)} depth layers)")
        print(df_location)
    else:
        print(f"‚ö†Ô∏è No data for {district}")

    time.sleep(2) # Add a 2-second delay between requests

Fetching soil data for Thanjavur (lat=10.78, lon=79.13)...
‚úÖ Data fetched for Thanjavur (7 depth layers)
Layer      Depth   bdod    cec   cfvo   clay  nitrogen    ocd   ocs  phh2o  \
0         0-30cm    NaN    NaN    NaN    NaN       NaN    NaN  37.0    NaN   
1          0-5cm  142.0  253.0   79.0  354.0     226.0  272.0   NaN   72.0   
2         5-15cm  144.0  264.0   75.0  360.0     148.0  203.0   NaN   73.0   
3        15-30cm  147.0  267.0   84.0  395.0     111.0  157.0   NaN   73.0   
4        30-60cm  148.0  272.0  117.0  446.0     100.0  116.0   NaN   74.0   
5       60-100cm  151.0  279.0  119.0  454.0      62.0   71.0   NaN   76.0   
6      100-200cm  152.0  284.0  155.0  446.0      59.0   49.0   NaN   76.0   

Layer   sand   silt    soc  wv0010  wv0033  wv1500   District  Latitude  \
0        NaN    NaN    NaN     NaN     NaN     NaN  Thanjavur     10.78   
1      360.0  286.0  279.0   345.0   280.0   169.0  Thanjavur     10.78   
2      345.0  295.0  151.0   347.0   283.0 

In [None]:
# Combine all locations
if all_data:
  df_all = pd.concat(all_data, ignore_index=True)
  df_all.to_csv("soil_profiles.csv", index=False)
  print("\nüåæ Saved to soil_profiles.csv")
else:
    print("No data collected.")

In [None]:
df_all

In [None]:
import requests
import pandas as pd
from datetime import date

def get_climate_data(lat, lon, start="2024-01-01", end="2024-12-31"):
    url = f"https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        "latitude": lat,
        "longitude": lon,
        "start": start.replace("-", ""),
        "end": end.replace("-", ""),
        "community":"ag",
        "parameters": "T2M,T2M_MAX,T2M_MIN,PRECTOTCORR,ALLSKY_SFC_SW_DWN",
        "format": "JSON"
    }
    r = requests.get(url, params=params)
    data = r.json()["properties"]["parameter"]
    df = pd.DataFrame(data)
    df.index = pd.to_datetime(df.index)
    df.reset_index(inplace=True)
    df.rename(columns={"index": "date"}, inplace=True)
    df["latitude"] = lat
    df["longitude"] = lon
    return df


        date    T2M  T2M_MAX  T2M_MIN  PRECTOTCORR  ALLSKY_SFC_SW_DWN  \
0 2024-01-01  22.38    28.61    17.44          0.0              12.78   
1 2024-01-02  22.19    28.54    17.09          0.0              12.26   
2 2024-01-03  22.25    28.71    17.39          0.0              14.96   
3 2024-01-04  22.60    28.88    18.07          0.0              15.18   
4 2024-01-05  22.33    29.32    17.73          0.0              17.21   

   latitude  longitude  
0   16.3067    80.4365  
1   16.3067    80.4365  
2   16.3067    80.4365  
3   16.3067    80.4365  
4   16.3067    80.4365  


In [None]:
all_climate_data = []

start_date = date(2024, 1, 1) # we can change the date for our problem
end_date = date(2024, 12, 31)'

for loc in district_coords:
    district = loc["district"]
    lat = loc["lat"]
    lon = loc["lon"]

    print(f"Fetching climate data for {district} (lat={lat}, lon={lon})...")
    df_climate_location = get_climate_data(lat, lon, start=start_date, end=end_date)

    if df_climate_location is not None:
        df_climate_location["District"] = district
        all_climate_data.append(df_climate_location)
        print(f"‚úÖ Climate data fetched for {district} ({len(df_climate_location)} days)")
    else:
        print(f"‚ö†Ô∏è No climate data for {district}")
    time.sleep(1) # Add a small delay to avoid overwhelming the API

Fetching climate data for Thanjavur (lat=10.78, lon=79.13)...
‚úÖ Climate data fetched for Thanjavur (320 days)
Fetching climate data for Thiruvarur (lat=10.76, lon=79.83)...
‚úÖ Climate data fetched for Thiruvarur (320 days)
Fetching climate data for Mayiladuthurai (lat=11.11, lon=79.67)...
‚úÖ Climate data fetched for Mayiladuthurai (320 days)
Fetching climate data for Tiruvallur (lat=13.13, lon=80.01)...
‚úÖ Climate data fetched for Tiruvallur (320 days)
Fetching climate data for Tiruvannamalai (lat=12.23, lon=79.11)...
‚úÖ Climate data fetched for Tiruvannamalai (320 days)
Fetching climate data for Krishnagiri (lat=12.74, lon=77.83)...
‚úÖ Climate data fetched for Krishnagiri (320 days)
Fetching climate data for Dharmapuri (lat=12.06, lon=78.16)...
‚úÖ Climate data fetched for Dharmapuri (320 days)
Fetching climate data for Salem (lat=11.6, lon=78.61)...
‚úÖ Climate data fetched for Salem (320 days)
Fetching climate data for Erode (lat=11.41, lon=77.22)...
‚úÖ Climate data fetched 

In [None]:
df_all_climate = pd.concat(all_climate_data, ignore_index=True)
print("Combined Climate Data:")
print(df_all_climate.head())

Combined Climate Data:
        date    T2M  T2M_MAX  T2M_MIN  PRECTOTCORR  ALLSKY_SFC_SW_DWN  \
0 2025-01-01  24.28    27.64    22.23         0.06              18.24   
1 2025-01-02  23.91    27.53    21.43         0.12              18.11   
2 2025-01-03  23.14    26.51    20.71         0.06              17.34   
3 2025-01-04  23.29    27.54    20.25         0.13              19.24   
4 2025-01-05  23.40    27.77    20.33         0.03              19.54   

   latitude  longitude   District  
0     10.78      79.13  Thanjavur  
1     10.78      79.13  Thanjavur  
2     10.78      79.13  Thanjavur  
3     10.78      79.13  Thanjavur  
4     10.78      79.13  Thanjavur  


In [None]:
timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")
df_all_climate.to_csv(f"climate_data_all_districts_{timestamp}.csv", index=False)
print(f"\n‚òÅÔ∏è Saved combined climate data to climate_data_all_districts_{timestamp}.csv")


‚òÅÔ∏è Saved combined climate data to climate_data_all_districts_2025_11_16_061226.csv
