In [None]:
import pandas as pd
import requests
import time
from datetime import datetime, timedelta

excel_filename = 'ML DATASET.xlsx'
target_sheets = ['BORNEO(MY)_DATA'] 
OUTPUT_FILE = 'final_borneo(my)_dataset.csv'
SAMPLE_SIZE = 5000

print(f"Loading sheets: {target_sheets}...")
df_list = []

try:
    for sheet in target_sheets:
        print(f" read {sheet}...")
        df_sheet = pd.read_excel(excel_filename, sheet_name=sheet)
        df_list.append(df_sheet)

        df = pd.concat(df_list, ignore_index=True)
        print(f"âœ… Combined Total Rows: {len(df)}")
        
except Exception as e:
    print(f"Error : {e}")
    print(" Check if the file name or sheet names are correct!")
    exit()

    if len(df) > SAMPLE_SIZE:
        df_final = df.sample(n=SAMPLE_SIZE, random_state=42)
else:
    df_final = df.copy()

print(f"done{len(df_final)}")

def get_raw_7days_weather(lat, lon, fire_date_str):
    try:
        if isinstance(fire_date_str, pd.Timestamp):
            date_obj = fire_date_str
        else:
            date_obj = datetime.strptime(str(fire_date_str).split(' ')[0], '%Y-%m-%d')

        date_keys = []
        for i in range(7, -1, -1): 
            d = date_obj - timedelta(days=i)
            date_keys.append(d.strftime('%Y%m%d'))

        start_str = date_keys[0] 
        end_str = date_keys[-1]   
        
        base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
        params = {
            'parameters': 'T2M,PRECTOTCORR,RH2M',
            'community': 'AG',
            'longitude': lon,
            'latitude': lat,
            'start': start_str,
            'end': end_str,
            'format': 'JSON'
        }

        response = requests.get(base_url, params=params, timeout=15)
        
        if response.status_code == 200:
            data = response.json()
            p = data['properties']['parameter']
            
            row_weather = {}

            for i, date_key in enumerate(date_keys):
                days_ago = 7 - i 
                
                # temperature
                val_t = p['T2M'].get(date_key, -999)
                row_weather[f'Temp_Day_Minus_{days_ago}'] = val_t if val_t > -100 else None
                
                # rain
                val_r = p['PRECTOTCORR'].get(date_key, -999)
                row_weather[f'Rain_Day_Minus_{days_ago}'] = val_r if val_r >= 0 else None
                
                # humidity
                val_h = p['RH2M'].get(date_key, -999)
                row_weather[f'Humid_Day_Minus_{days_ago}'] = val_h if val_h >= 0 else None
                
            return row_weather
        else:
            return None

    except:
        return None


print("\nStart")

weather_results = [] 

for index, row in df_final.iterrows():
    if len(weather_results) % 50 == 0:
        print(f"  {len(weather_results)} / {len(df_final)} row...")

    # 
    w_data = get_raw_7days_weather(row['latitude'], row['longitude'], row['acq_date'])
    
    if w_data:
        weather_results.append(w_data)
    else:
        weather_results.append({}) 
    
    time.sleep(0.3)

weather_df = pd.DataFrame(weather_results)


df_final = df_final.reset_index(drop=True)
df_final = pd.concat([df_final, weather_df], axis=1)

df_final = df_final.dropna(subset=['Temp_Day_0'])

df_final.to_csv(OUTPUT_FILE, index=False)

print("\n" + "="*40)
print(f"SAVE: {OUTPUT_FILE}")
print("="*40)
