In [1]:
from utils.data_preprocessing import *
import os

In [2]:
base_path = os.path.join('..', 'data', 'csv_from_dlis_raw')
csv_data = load_csv_files(base_path)

### Drop FRAMENO column

In [3]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            csv_data[well][logical_file][frame] = df.drop('FRAMENO', axis=1)

### Rename the INDEX columns to TDEP

In [4]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            csv_data[well][logical_file][frame] = (
                df.rename(
                    columns=lambda col: 'TDEP' 
                    if col.startswith('INDEX') 
                    else col
                )
            )

### Round TDEP

In [5]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            df['TDEP'] = df['TDEP'].round(1)

### Remove -999.25 values

In [6]:
for well, w_dict in csv_data.items():
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            try:
                df.replace([-999.25], [None], inplace = True)
            except:
                pass

### Remove values near the coating

In [7]:
base_path = os.path.join('..', 'data', 'agp')
coating_locations = extract_coating_location(base_path)

In [8]:
for well, w_dict in csv_data.items():
    # Coating locations
    surface_coating = coating_locations[well]['Surface Coating']
    intermediary_coating = coating_locations[well]['Intermediary Coating']

    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            
            try:
                # Remove values near the surface coating
                diff_surface = abs(df['TDEP'].min() - surface_coating)
                if diff_surface < 20:
                    margin = surface_coating + 5
                    lf_dict[frame] = df.loc[df['TDEP'] >= margin].reset_index(drop=True)

                # Remove values near the intermediary coating
                diff_intermediary = abs(df['TDEP'].min() - intermediary_coating)
                if diff_intermediary < 20:
                    margin = intermediary_coating + 5
                    lf_dict[frame] = df.loc[df['TDEP'] >= margin].reset_index(drop=True)
            except:
                pass

### Add BS

In [9]:
COATING_DRILL_DIAMETERS_MAPPING = {
    "30": "36",
    "20": "26",
    "13 3/8": "17 1/2",
    "9 5/8": "12 1/4"
}
coating_diameters = extract_coating_diameter(base_path)
drill_diameters = calculate_drill_diameters(coating_diameters, COATING_DRILL_DIAMETERS_MAPPING)

Exception with the intermediary coating of the well 1IDA 0001  SE: Expected a string, but got NoneType
Exception with the intermediary coating of the well 1MO  0001  SE: Expected a string, but got NoneType


In [10]:
for well, w_dict in csv_data.items():
    # Coating locations
    surface_coating_location = coating_locations[well]['Surface Coating']
    intermediary_coating_location = coating_locations[well]['Intermediary Coating']
            
    # Drill diameters
    surface_drill_diameter = drill_diameters[well]['Surface Drill']
    try:
        intermediary_drill_diameter = drill_diameters[well]['Intermediary Drill']
    except:
        print(f"Well {well} does not have intermediary coating")
                
    if (coating_locations[well]['Intermediary Coating'] is not None):
        bins = [surface_coating_location, intermediary_coating_location, float('inf')]
        labels = [surface_drill_diameter, intermediary_drill_diameter]
    else:
        bins = [surface_coating_location, float('inf')]
        labels = [surface_drill_diameter]
    
    for logical_file, lf_dict in w_dict.items():
        for frame, df in lf_dict.items():
            try:
                df['BS'] = pd.cut(df['TDEP'], bins=bins, labels=labels, right=False)
            except Exception as e:
                print(f"Exception with the well {well}: {e}")

Well 1IDA 0001  SE does not have intermediary coating
Well 1MO  0001  SE does not have intermediary coating


### Export the data to CSV files

In [11]:
dfs_to_csv(csv_data, "../data/dlis_preprocessed")