In [12]:
'''
This script is used to make histograms of any features from the clouds_condensed h5 files.
'''
import h5py
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.optim as optim
from torch.utils.data import Dataset, random_split
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
import torch.multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor, as_completed

import torch_geometric
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import sys
from pathlib import Path
import re
import ast
import os
from tqdm import tqdm
torch.multiprocessing.set_sharing_strategy('file_system') 

# Define the base directory
base_dir = Path('C:/Users/Nyan/Documents/GitHub/Physics-Informed-Neural-Networks-') 
print("Resolved base_dir:", base_dir.resolve())
print("Exists?", base_dir.exists())
fileset =list(base_dir.rglob('*.hdf5'))
# Define variables you're interested in plotting
vars = ['Training']

num_vars = len(vars)

# Function to process the file
def process_file(file, file_idx):
    prob_values = [[] for _ in range(num_vars)]
    
    with h5py.File(file, 'r') as h5file:
        prob_values = [np.array(h5file[var]) for var in vars]
    
    prob_values = [np.array(v) for v in prob_values]
    
    return prob_values


# Create empty arrays to store all the feature data for each type
prob_data = [[] for _ in range(num_vars)]
# Loads data in parallel
with ThreadPoolExecutor(max_workers=16) as executor:  # Adjust workers as needed
    try:
        futures = {executor.submit(process_file, file, str(file).split('_')[-1].split('.')[0]): file for file in fileset}
    
        for future in as_completed(futures):
            prob = future.result()
            for i in range(num_vars):
                prob_data[i].extend(prob[i])
    
    except KeyboardInterrupt:
        print("Keyboard interrupt detected. Shutting down...")
        executor.shutdown(wait=True, cancel_futures=True)
        exit(1)



save_path = "C:\\Users\\Nyan\\Documents\\GitHub\\Physics-Informed-Neural-Networks-"
for i, var in enumerate(vars):
    # Skip if no data collected
    if not prob_data[i]:
        print(f"[Skipping] No data found for variable '{var}'")
        continue

    try:
        # Flatten and clean the array
        prob_data_flat = np.concatenate(prob_data[i])
        prob_data_flat = prob_data_flat[~np.isnan(prob_data_flat)]

        if prob_data_flat.size == 0:
            print(f"[Skipping] '{var}' has only NaNs or empty arrays.")
            continue

        if np.all(prob_data_flat <= 0):
            print(f"[Skipping log] No positive values for '{var}'")
            log_scale = False
        else:
            log_scale = True

        prob_avg = np.mean(prob_data_flat)

        # Plot
        plt.figure()
        plt.hist(prob_data_flat, bins="rice", log=log_scale, density=True,
                 histtype="step", label=f"mean = {prob_avg:.4f}")
        plt.title(f"{var}")
        plt.legend()
        plt.savefig(f"{save_path}/{var}.png")
        plt.close()

    except Exception as e:
        print(f"[Error] Failed to plot '{var}': {e}")




Resolved base_dir: C:\Users\Nyan\Documents\GitHub\Physics-Informed-Neural-Networks-
Exists? True


In [17]:
'''
This script is used to make histograms of any features from the clouds_condensed h5 files.
'''
import h5py
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.optim as optim
from torch.utils.data import Dataset, random_split
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
import torch.multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor, as_completed

import torch_geometric
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import sys
from pathlib import Path
import re
import ast
import os
from tqdm import tqdm
torch.multiprocessing.set_sharing_strategy('file_system') 

# Define the base directory
base_dir = Path('C:/Users/Nyan/Documents/GitHub/Physics-Informed-Neural-Networks-') 
fileset =list(base_dir.rglob('*.hdf5'))
# Define variables you're interested in plotting
vars = ['Training']

num_vars = len(vars)

# Function to process the file
def process_file(file, file_idx):
    prob_values = [[] for _ in range(num_vars)]
    
    with h5py.File(file, 'r') as h5file:
        prob_values = [np.array(h5file[var]) for var in vars]
    
    prob_values = [np.array(v) for v in prob_values]
    
    return prob_values


# Create empty arrays to store all the feature data for each type
prob_data = [[] for _ in range(num_vars)]
# Loads data in parallel
with ThreadPoolExecutor(max_workers=16) as executor:  # Adjust workers as needed
    try:
        futures = {executor.submit(process_file, file, str(file).split('_')[-1].split('.')[0]): file for file in fileset}
    
        for future in as_completed(futures):
            prob = future.result()
            for i in range(num_vars):
                prob_data[i].extend(prob[i])
    
    except KeyboardInterrupt:
        print("Keyboard interrupt detected. Shutting down...")
        executor.shutdown(wait=True, cancel_futures=True)
        exit(1)



save_path = "C:\\Users\\Nyan\\Documents\\GitHub\\Physics-Informed-Neural-Networks-"
for i, var in enumerate(vars):
    # Skip if no data collected
    if not prob_data[i]:
        print(f"[Skipping] No data found for variable '{var}'")
        continue

    try:
        # Flatten and clean the array
        prob_data_flat = np.concatenate(prob_data[i])
        prob_data_flat = prob_data_flat[~np.isnan(prob_data_flat)]

        prob_avg = np.mean(prob_data_flat)

        # Plot
        plt.figure()
        plt.hist(prob_data_flat, bins="rice",
                 histtype="step", label=f"mean = {prob_avg:.4f}")
        plt.title(f"{var}")
        plt.legend()
        plt.savefig(f"{save_path}/{var}.png")
        plt.close()

    except Exception as e:
        print(f"[Error] Failed to plot '{var}': {e}")


