In [1]:
from utils.data_preprocessing import *
import os

In [2]:
base_path = os.path.join('..', 'data', 'csv_from_dlis_raw')
csv_data = load_csv_files(base_path)

### Rename the INDEX columns to TDEP

In [3]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            csv_data[well][logical_file][frame] = (
                df.rename(
                    columns=lambda col: 'TDEP' 
                    if col.startswith('INDEX') 
                    else col
                )
            )

### Round TDEP

In [4]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            df['TDEP'] = df['TDEP'].round(1)

### Remove -999.25 values

In [5]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            try:
                df.replace([-999.25], [None], inplace = True)
            except:
                pass

### Remove values near the coating

In [6]:
base_path = os.path.join('..', 'data', 'agp')
coating_data = extract_coating(base_path)

In [7]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            surface_coating = coating_data[well]['Rev. Superficie']
            intermediary_coating = coating_data[well]['Rev. Intermed.']
            
            try:
                # Remove values near the surface coating
                diff_surface = abs(df['TDEP'].min() - surface_coating)
                if diff_surface < 20:
                    margin = surface_coating + 5
                    lf_dict[frame] = df.loc[df['TDEP'] >= margin].reset_index(drop=True)

                # Remove values near the intermediary coating
                diff_intermediary = abs(df['TDEP'].min() - intermediary_coating)
                if diff_intermediary < 20:
                    margin = intermediary_coating + 5
                    lf_dict[frame] = df.loc[df['TDEP'] >= margin].reset_index(drop=True)
            except:
                pass

### Export the data to CSV files

In [8]:
dfs_to_csv(csv_data, "../data/dlis_preprocessed")