In [None]:
import tarfile
import pandas as pd
import random
import io
import pickle
import os

# Extract 100 random daily measure locations
### Store individually in 'lamah_100_dataframes.csv'
### Store concatinated in 'lamah_100_concatenated.csv'


In [None]:
# Path to your 2_LamaH-CE_daily.tar.gz file
tar_path = "./2_LamaH-CE_daily.tar.gz"
output_dir = "./data"

# Make sure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Open the tar.gz file
with tarfile.open(tar_path, "r:gz") as tar:
    # Get all CSV file members "A_basins_total_upstrm -> 2_timeseries -> daily -> *.csv"
    csv_files = [
        member for member in tar.getmembers()
        if member.name.startswith("A_basins_total_upstrm/2_timeseries/daily/")
        and member.name.endswith(".csv")
    ]
    
    # Randomly select 100 CSV files
    selected_files = random.sample(csv_files, 100)
    
    print(f"Selected {len(selected_files)} files.")
    
    first_df = None
    
    for member in selected_files:
        # Extract file content as bytes
        f = tar.extractfile(member)
        if f is not None:
            # LamaH files are semicolon-separated
            df = pd.read_csv(io.BytesIO(f.read()), sep=";", encoding="utf-8")
            
            # Save each DataFrame as its own CSV in ./data/
            filename_only = os.path.basename(member.name)   # e.g. "ID_163.csv"
            out_path = os.path.join(output_dir, filename_only)
            df.to_csv(out_path, index=False)
            
            # Keep first df just to show a sample
            if first_df is None:
                first_df = df.copy()

print("Saved 100 separate CSV files to ./data/")
if first_df is not None:
    print("Sample DataFrame (first file):")
    print(first_df.head())

# Optional: attributes for later use
if first_df is not None:
    attributes = list(first_df.columns)
    print("\nAttributes:", attributes)

### How to load data from now on only from pickle file

In [None]:
csv_dir = "./data"

# List all CSVs in the folder
csv_files = sorted([f for f in os.listdir(csv_dir) if f.endswith(".csv")])

print("Found CSV files:", len(csv_files))
print("First two files:", csv_files[:2])

# Load the first two CSVs
df1 = pd.read_csv(os.path.join(csv_dir, csv_files[0]))
df2 = pd.read_csv(os.path.join(csv_dir, csv_files[1]))

print("\nFirst CSV file:", csv_files[0])
print(df1.head())

print("\nSecond CSV file:", csv_files[1])
print(df2.head())

| Attribute               | Description                                                                                                                |
|-------------------------|----------------------------------------------------------------------------------------------------------------------------|
| YYYY                    | Year of observation (e.g., 1981)                                                                                           |
| MM                      | Month (1 to 12)                                                                                                            |
| DD                      | Day of month (1 to 31)                                                                                                     |
| DOY                     | Day of year (1 to 365/366); allows for seasonal analysis                                                                  |
| 2m_temp_max             | Maximum air temperature at 2 meters above ground (°C)                                                                      |
| 2m_temp_mean            | Mean air temperature at 2 meters above ground (°C)                                                                         |
| 2m_temp_min             | Minimum air temperature at 2 meters above ground (°C)                                                                      |
| 2m_dp_temp_max          | Maximum dew point temperature at 2 meters (°C); temperature at which air becomes saturated                                  |
| 2m_dp_temp_mean         | Mean dew point temperature at 2 meters (°C)                                                                                 |
| 2m_dp_temp_min          | Minimum dew point temperature at 2 meters (°C)                                                                             |
| 10m_wind_u              | East-west wind speed component at 10 meters (m/s)                                                                           |
| 10m_wind_v              | North-south wind speed component at 10 meters (m/s)                                                                        |
| fcst_alb                | Forecasted surface albedo (reflectivity; unitless or percent)                                                               |
| lai_high_veg            | Leaf Area Index for high vegetation; proxy for vegetation cover (dimensionless)                                             |
| lai_low_veg             | Leaf Area Index for low vegetation (dimensionless)                                                                          |
| swe                     | Snow Water Equivalent (mm); total water content in snowpack                                                                |
| surf_net_solar_rad_max  | Maximum net solar radiation at surface (W/m²); key for surface heating and energy balance                                    |
| surf_net_solar_rad_mean | Mean net solar radiation at surface (W/m²)                                                                                  |
| surf_net_therm_rad_max  | Maximum net thermal (infrared) radiation at surface (W/m²); affects nighttime cooling                                        |
| surf_net_therm_rad_mean | Mean net thermal radiation at surface (W/m²)                                                                                |
| surf_press              | Surface atmospheric pressure (hPa)                                                                                           |
| total_et                | Total evapotranspiration (mm); water flux from land/vegetation to atmosphere                                                |
| prec                    | Precipitation (mm); daily total                                                                                             |
| volsw_123               | Volumetric soil water content in soil layers 1+2+3 (mm or %)                                                               |
| volsw_4                 | Volumetric soil water content in soil layer 4 (mm or %)                                                                     |
