In [32]:
import glob
import pandas as pd
import numpy as np
import xarray as xr

# Load data file
spt = "Vizag_000001_S03-2012_SPT.txt"
spt_files = pd.read_csv(spt, header=None)

# Extract time stamps using regex pattern
pattern = r'Time Stamp= \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
matching_indices = spt_files[spt_files[0].str.contains(pattern, na=False)].index
date = spt_files.iloc[matching_indices]
date = date[0].str.extract(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})')
date.reset_index(inplace=True)

# Add the last boundary index to cover final spectrum
matching_indices = matching_indices.append(pd.Index([matching_indices[-1] + 65]))

# Separate each spectrum into individual data blocks
spect = [spt_files.iloc[matching_indices[i] + 1:matching_indices[i + 1]].values.tolist() 
         for i in range(0, len(matching_indices) - 1)]

# Initialize list for xarray datasets
datasets = []

# Process each spectral block and create a dataset with metadata
for i in range(len(spect)):
    data_values = pd.DataFrame(spect[i])
    data_values = data_values[0].str.split("\t", expand=True)
    data_values.columns = ["Frequency", "SmaxXpsd", "dir_angle", "spr", "skw",
                           "kurt", "m2", "n2", "K", "Lat", "Lon"]
    
    # Convert columns to numeric where possible
    data_values = data_values.apply(pd.to_numeric, errors='coerce')
    
    # Set Frequency as index
    data_values.set_index("Frequency", inplace=True)
    
    # Convert DataFrame to xarray.Dataset
    data_xr = xr.Dataset.from_dataframe(data_values)
    
    # Add coordinate for time based on extracted date values
    data_xr = data_xr.assign_coords(time=pd.to_datetime(date.iloc[i, 1]))

    # Add metadata for each variable
    data_xr["SmaxXpsd"].attrs = {"units": "unit1", "long_name": "Spectral Max Power Density"}
    data_xr["dir_angle"].attrs = {"units": "degrees", "long_name": "Direction Angle"}
    data_xr["spr"].attrs = {"units": "degrees", "long_name": "Spread"}
    data_xr["skw"].attrs = {"units": "unit2", "long_name": "Skewness"}
    data_xr["kurt"].attrs = {"units": "unit3", "long_name": "Kurtosis"}
    data_xr["m2"].attrs = {"units": "m^2", "long_name": "Moment Order 2"}
    data_xr["n2"].attrs = {"units": "n_unit", "long_name": "Some Variable N2"}
    data_xr["K"].attrs = {"units": "kelvin", "long_name": "Constant K"}
    data_xr["Lat"].attrs = {"units": "degrees_north", "long_name": "Latitude"}
    data_xr["Lon"].attrs = {"units": "degrees_east", "long_name": "Longitude"}
    
    # Append the dataset with metadata to the list
    datasets.append(data_xr)

# Concatenate all datasets along the "time" dimension
combined_data_xr = xr.concat(datasets, dim="time")

# Add global attributes for dataset metadata
combined_data_xr.attrs = {
    "title": "Spectral Data Analysis",
    "description": "Spectral data including frequency-based metrics with time stamps",
    "units_note": "Units and descriptions for each variable can be found in variable attributes",
    "created_by": "Anoop TR ",
    "Any suggestions":"email me: anooptr43@gmail.com"
}

output_filename = spt.replace(".txt", ".nc")
combined_data_xr.to_netcdf(output_filename)