In [None]:
import csv
import re

def convert_oshen_metadata_to_nccsv(metadata_csv_path, output_nccsv_path):
    """
    Converts the specific 'oshen_metadata.csv' file into the header 
    section of an nccsv file.

    This function is tailored to the structure of 'oshen_metadata.csv', where
    each row defines a variable and its properties. It maps the CSV columns
    to standard NCCSV attributes.

    Args:
        metadata_csv_path (str): The file path for the input 'oshen_metadata.csv'.
        output_nccsv_path (str): The file path for the output nccsv file.
    """
    try:
        with open(metadata_csv_path, 'r', encoding='utf-8') as infile:
            reader = csv.reader(infile)
            header = next(reader) # Read the header row
            
            # --- Column mapping from oshen_metadata.csv to NCCSV attributes ---
            # This dictionary defines how to translate the source CSV headers
            # into the attribute names required by the NCCSV format.
            column_to_attribute_map = {
                'Unit': 'units',
                'Description': 'long_name',
                'Averaging Method': 'comment', # Or a custom attribute
                'Valid Min': 'valid_min',
                'Valid Max': 'valid_max',
                'Sensor': 'sensor_model',
                'Sample Rate / Hz': 'sensor_sample_rate_hz',
                'Sensor Height': 'sensor_height',
                'Fill Value': '_FillValue',
                'Comment': 'comment' 
            }

            # --- 1. Start building the NCCSV output list ---
            output_lines = []
            
            # --- 2. Add Default Global Attributes ---
            # The source CSV doesn't contain global attributes, so we add some
            # default ones as required by the NCCSV and CF conventions.
            output_lines.append('*GLOBAL*,Conventions,"CF-1.6, ACDD-1.3, NCCSV-1.0"')
            output_lines.append('*GLOBAL*,title,"OSHEN Data (Please edit this title)"')
            output_lines.append('*GLOBAL*,summary,"(Please add a summary of the dataset)"')
            output_lines.append('*GLOBAL*,institution,"(Please add your institution)"')
            output_lines.append('') # Blank line for readability

            variable_names_for_data_header = []

            # --- 3. Process each row as a variable definition ---
            for row in reader:
                # Skip the numerous blank lines in the source file
                if not any(row):
                    continue

                # The first column is the variable's name
                variable_name = row[0]
                if not variable_name:
                    continue
                
                variable_names_for_data_header.append(variable_name)
                output_lines.append(f"# --- Variable: {variable_name} ---")

                # --- 4. Infer and add the mandatory *DATA_TYPE* attribute ---
                # The NCCSV spec requires a *DATA_TYPE* for each variable.
                # We infer it here. Defaulting to 'float' for most sensor data.
                # This is an educated guess and might need adjustment.
                data_type = 'float' # Default
                if 'time' in variable_name:
                    data_type = 'long' # Unix epoch times are often large integers
                elif any(kw in variable_name for kw in ['format', 'target']):
                    data_type = 'String'
                
                output_lines.append(f'{variable_name},*DATA_TYPE*,{data_type}')

                # --- 5. Map CSV columns to variable attributes ---
                for i, value in enumerate(row):
                    if i == 0 or not value: # Skip the variable name column and empty values
                        continue
                    
                    # Get the original header for the current column
                    original_header = header[i]
                    
                    # Find the corresponding NCCSV attribute name from our map
                    attribute_name = column_to_attribute_map.get(original_header)
                    
                    if attribute_name:
                        # For the '_FillValue', add the 'f' suffix for float type
                        if attribute_name == '_FillValue' and data_type == 'float':
                            value_str = f"{value}f"
                        # For other numeric attributes, we can add suffixes if needed,
                        # but for now we treat them as strings as per the spec's flexibility.
                        # We will quote strings that contain commas or special characters.
                        elif ',' in value or '"' in value:
                            value_str = f'"{value.replace("\"", "\"\"")}"'
                        else:
                            value_str = value
                            
                        output_lines.append(f'{variable_name},{attribute_name},{value_str}')

                output_lines.append('') # Add a blank line for readability

            # --- 6. Finalize the NCCSV structure ---
            final_output = []
            final_output.append("# nccsv\n")
            final_output.append("# Global Attributes\n")
            
            # Add the formatted variable attributes to the final output
            # This is a bit of a restructure to match the spec's ordering
            global_lines = [line for line in output_lines if line.startswith('*GLOBAL*')]
            variable_lines = [line for line in output_lines if not line.startswith('*GLOBAL*')]
            
            final_output.extend(global_lines)
            final_output.append("\n# Variable Attributes\n")
            final_output.extend(variable_lines)

            # Add the end of metadata marker
            final_output.append("*END_METADATA*\n")
            
            # Add the data header line
            final_output.append("# Data\n")
            final_output.append(",".join(variable_names_for_data_header) + "\n")
            
            # Add the end of data marker
            final_output.append("*END_DATA*\n")

            # --- 7. Write to the output file ---
            with open(output_nccsv_path, 'w', encoding='utf-8', newline='\n') as outfile:
                outfile.write("\n".join(final_output))

        print(f"Successfully converted metadata to '{output_nccsv_path}'")

    except FileNotFoundError:
        print(f"Error: The file '{metadata_csv_path}' was not found.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# --- Example Usage ---
if __name__ == '__main__':
    # Define the input and output file paths
    # Make sure 'oshen_metadata.csv' is in the same directory as this script.
    input_file = 'oshen_metadata.csv'
    output_file = 'oshen_output.nccsv'

    # Call the conversion function
    convert_oshen_metadata_to_nccsv(input_file, output_file)

    # Optional: Print the content of the created file to the console
    print(f"\n--- Content of generated {output_file} ---")
    with open(output_file, 'r') as f:
        print(f.read())
