In [21]:
import json
from netCDF4 import Dataset, num2date
import numpy as np
from tqdm import notebook

# Inputs
nc_path = "/Users/manaruchi/Desktop/WeatherDataViz/raw_data/AFCNWP_WRF_model_output_00UTC.nc"
#target_lat = 28.6139 
#target_lon = 77.2090
starting_lon = 45
ending_lon = 110
starting_lat = -5
ending_lat = 40
# For this sample project, using interval of 5 degree
spacing = 5 # Spatial Interval

# Generate list of lat and lon to generate point data for
lat_to_process = np.arange(starting_lat, ending_lat + spacing, spacing)
lon_to_process = np.arange(starting_lon, ending_lon + spacing, spacing)
lat_long_list = []
for lat in lat_to_process:
    for lon in lon_to_process:
        lat_long_list.append([lat, lon])


# Open NetCDF
nc = Dataset(nc_path)

#Set time & vertical level limit
tlimit = 72
tlev = 32

# Extract lat/lon
lats = nc.variables['XLAT'][0, :, :]  # [time, y, x] → use time=0
lons = nc.variables['XLONG'][0, :, :]

def generateJSON(target_lat, target_lon):
    # Compute distance to target
    dist_sq = (lats - target_lat)**2 + (lons - target_lon)**2
    j, i = np.unravel_index(np.argmin(dist_sq), lats.shape)  # (y, x) indices
    
    # Extract time and variable
    times = nc.variables['Times'][:]
    
    # Get Surface Wind Data
    u = nc.variables['U10'][:, j, i]  # Time series at closest grid point
    v = nc.variables['V10'][:, j, i]  
    speed = np.sqrt(u**2 + v**2)
    direction = (np.arctan2(-u, -v) * 180 / np.pi) % 360

    # Get Upper Wind Data
    upper_wind_speeds = []
    upper_wind_dirs = []
    for lev in range(tlev):
        u = nc.variables['U'][:, lev, j, i]
        v = nc.variables['V'][:, lev, j, i]
        u_speed =  np.sqrt(u**2 + v**2) * 1.94384
        u_direction = (np.arctan2(-u, -v) * 180 / np.pi) % 360
        upper_wind_dirs.append(u_direction)
        upper_wind_speeds.append(u_speed)
        
    # Get Temperature Data
    t = nc.variables['T2'][:, j, i] - 273.15
    
    # Get Upper Level Temperature Data
    upper_temperatures = []
    for lev in range(tlev):
        potT = nc.variables['T'][:, lev, j, i]
        perP = nc.variables['P'][:, lev, j, i]
        baseP = nc.variables['PB'][:, lev, j, i]
    
        # Compute Pressure in Pa → convert to hPa
        pressure = (perP + baseP) / 100.0 
    
        # Compute potential temperature: theta = T + 300
        theta = potT + 300.0 # In K
    
        # Convert to actual temperature using Poisson's equation
        T_actual = theta * (pressure / 1000.0) ** 0.2854  # in K
        T_actual = T_actual - 273.15 # In degree C
    
        upper_temperatures.append(T_actual)

    # Get Altitude for each level
    altitudes = []
    for lev in range(tlev):
        perGeopot = nc.variables['PH'][:, lev, j, i]
        baseStateGeopot = nc.variables['PHB'][:, lev, j, i]

        # Compute total geopotential
        ph_total = perGeopot + baseStateGeopot  # shape: (time, nz_stag, ny, nx)
        
        # Convert to geometric height in meters
        z_stag = ph_total / 9.81  # shape: (time, nz_stag, ny, nx)

        altitudes.append(z_stag)

    print(altitudes.shape)

        
    
    # Get Rainfall Data
    rain = nc.variables['RAINC'][:, j, i] + nc.variables['RAINNC'][:, j, i]

    # Get Cloud Fraction Data
    cloud_fraction_list = []
    for lev in range(tlev):
        cldfra = nc.variables['CLDFRA'][:, lev, j, i]
        cloud_fraction_list.append(cldfra)
    # Convert time to datetime
    try:
        time_units = nc.variables['XTIME'].units
        time_datetimes = num2date(nc.variables['XTIME'][:], units=time_units)
    except:
        # Fallback if 'XTIME' isn't present
        time_datetimes = ["".join(t.astype(str)) for t in times]
    
    # Create JSON
    data = []
    
    
    for i in range(tlimit-1):
        new_dict = dict()
        new_dict['time'] = str(time_datetimes[i])
        new_dict['timeIndex'] = i
        new_dict["wind_direction_surface"] = round(direction[i])
        new_dict["wind_speed_surface"] = float(round(speed[i] * 1.94384,2))
        new_dict["temperature"] = float(round(t[i],2))
        new_dict["rainfall"] = float(round(rain[i+1]-rain[i]))
    
        for lev in range(tlev):
            new_dict[f"wind_direction_{lev}"] = round(upper_wind_dirs[lev][i])
            new_dict[f"wind_speed_{lev}"] = float(round(upper_wind_speeds[lev][i],2))
            new_dict[f"temperature_{lev}"] = float(round(upper_temperatures[lev][i],2))
            new_dict[f"cloud_fraction_{lev}"] = float(round(cloud_fraction_list[lev][i],2))
        data.append(new_dict)

    # Write to JSON file
    with open(f"point_data_{target_lat}_{target_lon}.json", "w") as f:
        json.dump(data, f, indent=2)


In [None]:
for x in notebook.tqdm(lat_long_list, desc="Generating Point Files..."):
    generateJSON(x[0], x[1])


Generating Point Files...:   0%|          | 0/140 [00:00<?, ?it/s]

[masked_array(data=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                   0.0, 0.0, 0.0],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,


In [10]:
ph_total

NameError: name 'ph_total' is not defined

In [27]:
cldfra.shape

(124, 32, 198, 255)

In [11]:
z_full.shape

(124, 33, 198, 255)

# Testing Zone (Ignore)

In [1]:
import json
import random
from datetime import datetime, timedelta

pressure_levels = [
    925.67596, 919.47375, 911.6386, 901.81964, 889.63293, 874.685,
    856.61194, 835.12115, 810.0484, 781.4113, 749.4497, 714.624,
    677.5836, 639.00775, 599.2603, 558.45807, 516.7484, 474.3227,
    431.42194, 388.34344, 345.44986, 303.16223, 261.95807, 222.5446,
    187.03839, 156.94997, 131.70135, 110.51484, 92.73604, 77.81701,
    65.29808, 54.793236
]

start_time = datetime(2024, 6, 1, 0, 0)  # starting datetime
hours = 24 * 3  # 3 days hourly data

data = []

for hour in range(hours):
    current_time = start_time + timedelta(hours=hour)
    iso_time = current_time.isoformat() + "Z"
    for pressure in pressure_levels:
        entry = {
            "time": iso_time,
            "pressure": pressure,
            "cloud_fraction": round(random.uniform(0, 1), 3)
        }
        data.append(entry)

# Save to JSON file
with open("cloud_fraction_sample.json", "w") as f:
    json.dump(data, f, indent=2)

print("Sample JSON file 'cloud_fraction_sample.json' created successfully.")

Sample JSON file 'cloud_fraction_sample.json' created successfully.


In [2]:
ds['PH']

NameError: name 'ds' is not defined

In [16]:
cldfra = ncfile.variables['CLDFRA'][:, j, i]  # shape: (time, bottom_top, south_north, west_east)
ph = ncfile.variables['PH'][:, j, i]          # Perturbation geopotential
phb = ncfile.variables['PHB'][:, j, i]    

NameError: name 'j' is not defined