In [7]:
import pandas as pd
import os

In [8]:
#I/O
input_file = r'C:\Users\castl\Desktop\Internship stuff\fL_Data\fL-all-bc-tag.dat'
output_file = r'C:\Users\castl\Desktop\Internship stuff\fL_Data\fL-all-bc-tag.csv'

In [9]:
def manual_parse_with_headers(input_file, output_file):
    """Manual parsing method as backup"""
    
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        # Find the header line - let's check multiple lines to find the right one
        column_names = None
        header_line_index = None
        
        for i in range(len(lines)):
            line = lines[i].strip()
            print(f"Line {i}: '{line}'")
            if 'Q^2' in line and 'xB' in line:  # This should be our header line
                column_names = line.split()
                header_line_index = i
                print(f"Found header at line {i}: {column_names}")
                break
        
        if column_names is None:
            # Fallback: use default column names
            column_names = ['Q_squared', 'xB', 'W_squared', 'FL', 'errFL', 'experiment']
            header_line_index = 4  # Assume data starts after line 4
            print(f"Header line not found, using default column names: {column_names}")
        
        # Parse data lines (starting after the header line)
        data_rows = []
        start_line = header_line_index + 1 if header_line_index is not None else 5
        
        for i in range(start_line, len(lines)):
            line = lines[i].strip()
            if line and not line.startswith('#'):  # Skip empty lines and comments
                # Split by whitespace but carefully handle the last column
                parts = line.split()
                if len(parts) >= 6:
                    # First 5 columns are numeric, rest is experiment name (may have spaces)
                    row = [
                        parts[0],  # Q^2
                        parts[1],  # xB  
                        parts[2],  # W^2
                        parts[3],  # FL
                        parts[4],  # errFL
                        ' '.join(parts[5:])  # experiment (join all remaining parts)
                    ]
                    data_rows.append(row)
                    if len(data_rows) <= 3:  # Show first few rows being parsed
                        print(f"Parsed row {len(data_rows)}: {row}")
                else:
                    print(f"Warning: Skipping malformed line {i+1} (only {len(parts)} parts): {line}")
        
        print(f"Total data rows parsed: {len(data_rows)}")
        
        if not data_rows:
            print("No valid data rows found!")
            return None
        
        # Create DataFrame
        df = pd.DataFrame(data_rows, columns=column_names)
        
        # Convert numeric columns
        numeric_columns = ['Q^2', 'xB', 'W^2', 'FL', 'errFL']
        for col in numeric_columns:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
        
        print(f"Manual parsing successful!")
        print(f"Shape: {df.shape}")
        print(df.head(10))
        
        # Save to CSV
        df.to_csv(output_file, index=False)
        print(f"Saved to: {output_file}")
        
        return df
        
    except Exception as e:
        print(f"Manual parsing also failed: {e}")
        return None

# Check if file exists and run conversion
if __name__ == "__main__":
    if os.path.exists(input_file):
        result = convert_dat_file(input_file, output_file)
        if result is not None:
            print(f"\n Conversion completed successfully!")
            print(f"Output file: {output_file}")
            print(f"Data contains {len(result)} rows of physics measurements")
        else:
            print("❌ Conversion failed!")
    else:
        print(f"❌ Error: Input file not found at: {input_file}")
        print("Please check the file path.")

Note: Using manual parsing to handle experiment names with spaces...
Line 0: 'Longitudinal proton structure function data'
Line 1: 'from L/T separated cross section experiments'
Line 2: 'P. Monaghan et al., Phys. Rev. Lett. 110 (2013) 152002'
Line 3: ''
Line 4: ''
Line 5: 'Q^2	 xB	 W^2	  FL	 errFL	 experiment'
Found header at line 5: ['Q^2', 'xB', 'W^2', 'FL', 'errFL', 'experiment']
Parsed row 1: ['0.75', '0.0625', '12.13', '0.07109', '0.01416', 'Whitlow/SLAC']
Parsed row 2: ['0.75', '0.0769', '9.88', '0.07497', '0.01291', 'Whitlow/SLAC']
Parsed row 3: ['0.75', '0.0769', '9.88', '0.09478', '0.019', 'E140X']
Total data rows parsed: 341
Manual parsing successful!
Shape: (341, 6)
    Q^2      xB    W^2       FL    errFL    experiment
0  0.75  0.0625  12.13  0.07109  0.01416  Whitlow/SLAC
1  0.75  0.0769   9.88  0.07497  0.01291  Whitlow/SLAC
2  0.75  0.0769   9.88  0.09478  0.01900         E140X
3  0.75  0.0943   8.08  0.05595  0.03925  Whitlow/SLAC
4  0.75  0.1130   6.77  0.12004  0.0185