In [None]:
import os
import fnmatch
import pandas as pd
import math
import re

pd.set_option('display.expand_frame_repr', False)
IS_DEBUG_MODE = False
CLEARANCE = 190

# Define the path
DIR_PATHS = [
    r'analysis_path'
]
OUTPUT_FILE_NAME = 'Report_Tables.csv'

# Define the search strings in a dictionary
SEARCH_PATTERNS = {
    'ronja.dat': '*rj.dat',
    'ph_riser.ARS': '*ph10.ARS'
}

def find_files(directory_path, search_patterns):
    file_data = []

    for root, _, files in os.walk(directory_path):
        for _, pattern in search_patterns.items():
            for filename in fnmatch.filter(files, pattern):
                file_data.append({
                    'Search String': pattern,
                    'File Name': filename,
                    'Path': os.path.join(root, filename)
                })

    return pd.DataFrame(file_data[:2] if IS_DEBUG_MODE else file_data)

def extract_lines(file_path, search_string, start_lines, no_lines):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    for i, line in enumerate(lines):
        if search_string in line:
            return lines[i+start_lines:i+no_lines]
    return []

def find_string_in_file(file_path, search_string):
    with open(file_path, 'r') as file:
        return any(search_string in line for line in file)

def extract_lines_and_find_strings(file_path, search_strings):
    chunk_size = 20 * 100000  # 20 MB, approx. > 160k lines
    lines = []
    found_strings = {search_string: False for search_string in search_strings}
    extracted_lines = {search_string: [] for search_string in search_strings}
    with open(file_path, 'r') as file:
        while True:
            chunk = file.read(chunk_size)
            if not chunk:
                break
            chunk_lines = chunk.splitlines()
            lines.extend(chunk_lines)
            for search_string in search_strings:
                if not found_strings[search_string] and any(search_string in line for line in chunk_lines):
                    found_strings[search_string] = True
    for search_string, (start_lines, no_lines) in search_strings.items():
        for i, line in enumerate(lines):
            if search_string in line:
                extracted_lines[search_string] = lines[i+start_lines:i+no_lines]
                break
    return extracted_lines, found_strings


def analyze_rj_dat(file_path):
    search_strings = {
        'S H I P   I M P A C T   R E S U L T S': (5, 6), # start from 5th line and take 6 lines from the line with found search string
        'Final time step reached.': (0, 0),
        'Stop code for exeeded ult. limit strain': (0, 1)
    }
    extracted_lines, found_strings = extract_lines_and_find_strings(file_path, search_strings)
    lines = extracted_lines['S H I P   I M P A C T   R E S U L T S']
    is_final_time_step_reached = found_strings['Final time step reached.']
    line_stop_code = extracted_lines['Stop code for exeeded ult. limit strain']

    stop_code = int(line_stop_code[0].split()[-1]) if line_stop_code else None
    
    data = [re.split(r'\s+|(?<=\d)-', line.strip()) for line in lines] # split by space or if counter no space but negative sign

    column_names = ['Node name', 'spring', 'beam 1', 'beam 2', 'Load case', 'Dent code', 'Force (kN)', 
                    'Deformations struct (m)', 'Deformations dent (m)', 'Deformations ship (m)', 
                    'Ship impact energies struct (kNm)', 'Ship impact energies dent (kNm)', 
                    'Ship impact energies ship (kNm)', 'Ship impact energies total (kNm)']
    
    if data: # exception for empty data, for instance the file is empty
        if len(data[0]) != 14: # check if data has 14 columns, had exception when it was not
            raise ValueError(f"Data has {len(data[0])} columns, expected 14. Data: \n{data}")
    else:
        raise ValueError("No keyword 'Ship Impact Results' found in the file: ", file_path)

    df = pd.DataFrame(data, columns=column_names)
    df['Final time step reached'] = 'YES' if is_final_time_step_reached else '-'
    df['Stop Code'] = stop_code
    
    print("Ronja file analysed: ", file_path)
    return add_file_info(df, file_path)

def analyze_ph_ars(file_path):
    df = pd.read_csv(file_path, header=6)
    df = df[df['!NODDIS'] != '#EOF']  
    return add_file_info(df, file_path)

def add_file_info(df, file_path):
    filename_parts = file_path.split('_')
    rj_mod1 = filename_parts[-3]
    rj_mod2 = filename_parts[-2]

    df['File Path'] = file_path.replace('R:\\', '\\\\mack2\\og\\')
    df['Dir'] = rj_mod1
    df['Subdir'] = rj_mod2
    return df

FUNCTION_MAP = {
    SEARCH_PATTERNS['ronja.dat']: analyze_rj_dat,
    SEARCH_PATTERNS['ph_riser.ARS']: analyze_ph_ars,
}

def apply_function(row):
    if row['Search String'] in FUNCTION_MAP:
        return FUNCTION_MAP[row['Search String']](row['Path'])
    return None

def main():
    df = pd.concat([find_files(path, SEARCH_PATTERNS) for path in DIR_PATHS], ignore_index=True)
    df['result'] = df.apply(apply_function, axis=1)

    mask = df['Search String'].str.endswith(SEARCH_PATTERNS['ronja.dat'])
    merged_df = pd.concat(df.loc[mask, 'result'].tolist(), ignore_index=True)
    
    mask = df['Search String'].str.endswith(SEARCH_PATTERNS['ph_riser.ARS'])
    filtered_dfs = df.loc[mask, 'result']
    
    if not filtered_dfs.empty:
        merged_df2 = pd.concat(filtered_dfs.tolist(), ignore_index=True)
        merged_df2 = merged_df2.rename(columns={merged_df2.columns[1]: 'Node name'})
        merged_df2['Riser Displacement'] = merged_df2.apply(lambda row: math.sqrt(row['X-displacement']**2 + row['Y-displacement']**2), axis=1)
        merged_df2['File Path ARS'] = merged_df2['File Path']
        merged_df = pd.merge(merged_df, merged_df2[['Node name', 'Dir', 'Subdir', 'Riser Displacement', 'File Path ARS']], 
                            on=['Dir', 'Subdir'], 
                            how='outer', 
                            suffixes=('', '_y'))

        merged_df = merged_df[merged_df['Node name'] != merged_df['Node name_y']]
        merged_df = merged_df.drop(columns=['Node name_y'])
        
    else:
        merged_df['Riser Displacement'] = math.nan
        merged_df['File Path ARS'] = "ARS File by search key: '({})' not found in the directory structure.".format(SEARCH_PATTERNS['ph_riser.ARS'])

    merged_df = merged_df.sort_values(by=['Dir', 'Subdir', 'Node name', 'Stop Code'])
    
    merged_df = merged_df.drop_duplicates(subset='File Path', keep='last') # remove dublicates if exist (happens if have ph_riser.ARS in both code directiories, fix later)
    merged_df = merged_df.reset_index(drop=True) # fix index
    print(merged_df)

    return merged_df

if __name__ == "__main__":
    merged_df = main()

In [None]:
tb1_df = merged_df[['Dir', 'Subdir', 'Stop Code', 'Ship impact energies struct (kNm)', 'Ship impact energies dent (kNm)', 'Ship impact energies ship (kNm)', 'Ship impact energies total (kNm)', 'Deformations struct (m)', 'Deformations dent (m)', 'Final time step reached', 'File Path']]
print(tb1_df)

# Define the headers
tb1_headers1 = ['Scenario', 'SubScenario', 'Stop code', 'Absorbed energy (kNm)', '', '', '', 'Impact deformations (m)', '', 'Final time-step reached', 'Links:']
tb1_headers2 = ['', '', '', 'Struct.', 'Dent', 'Ship', 'Total', 'Struct.', 'Dent', '', '']

# Filter rows where 'Stop Code' is 3
filtered_df = merged_df[merged_df['Stop Code'] == 3]

# Create a new DataFrame with the desired columns
tb2_df = filtered_df[['Dir', 'Subdir', 'Deformations struct (m)', 'Riser Displacement', 'Deformations dent (m)']].copy()

# Rename the columns
tb2_df.columns = ['Scenario', 'SubScenario', 'Struct. displacement', 'Riser Displacement', 'Caisson dent']

# Convert columns to numeric (from m to mm and round to 0 decimals)
tb2_df['Struct. displacement'] = (pd.to_numeric(tb2_df['Struct. displacement'], errors='coerce') * 1000).round(0)
tb2_df['Riser Displacement'] = (pd.to_numeric(tb2_df['Riser Displacement'], errors='coerce') * 1000).round(0)
tb2_df['Caisson dent'] = (pd.to_numeric(tb2_df['Caisson dent'], errors='coerce') * 1000).round(0)

# Add a new column with a fixed value
tb2_df['Available clearance'] = CLEARANCE  # replace 100 with the desired value

# Add a new column with a value calculated from previous columns
tb2_df['Residual post-impact clearance'] = tb2_df['Available clearance'] - tb2_df['Struct. displacement'] + tb2_df['Riser Displacement'] - tb2_df['Caisson dent']

tb2_df['Links:'] = merged_df['File Path ARS']
tb2_df['Empty1'] = ''
tb2_df['Empty2'] = ''
tb2_df['Empty3'] = ''
        
# Reorder the columns
tb2_df = tb2_df[['Scenario', 'SubScenario', 'Available clearance', 'Struct. displacement', 'Riser Displacement', 'Caisson dent', 'Residual post-impact clearance', 'Empty1', 'Empty2', 'Empty3', 'Links:']]


print(tb2_df)

# Define the headers
tb2_headers1 = ['Scenario', 'SubScenario', 'Available hor.', 'Caisson hor.', 'Riser hor.', 'Caisson', 'Residual post-impact', '', '', '', 'Links:']
tb2_headers2 = ['', '', 'clearance.', 'displacement.', 'displacement.', 'dent', 'clearance', '', '', '', '']
tb2_headers3 = ['', '', '[mm].', '[mm].', '[mm].', '[mm]', '[mm]', '', '', '', '']


In [None]:
import csv
PRINT_RESULTS = True

if PRINT_RESULTS:
    # Remove the old file if it exists
    if os.path.exists(OUTPUT_FILE_NAME):
        os.remove(OUTPUT_FILE_NAME)
        
    # Write tb1_headers1 and tb1_headers2 to a CSV file
    with open(OUTPUT_FILE_NAME, 'w', newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow(tb1_headers1)
        writer.writerow(tb1_headers2)

    # Append the first DataFrame to the CSV file
    tb1_df.to_csv(OUTPUT_FILE_NAME, mode='a', index=False, sep=';', header=False)

    # Write 3 empty rows and new table headers to the CSV file
    with open(OUTPUT_FILE_NAME, 'a', newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow([])
        writer.writerow([])
        writer.writerow([])
        writer.writerow(tb2_headers1)
        writer.writerow(tb2_headers2)
        writer.writerow(tb2_headers3)

    # Append the second DataFrame to the CSV file
    tb2_df.to_csv(OUTPUT_FILE_NAME, mode='a', index=False, sep=';', header=False)
