<a href="https://colab.research.google.com/github/parthpranav2/ROP-Optimization/blob/main/Synthetic_drilling_params_inducer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

READ-ME

Upload the file "initial_well_logs.zip" and then start running the project.

Process Training Wells (All except S1, S2, S3)

In [2]:
import pandas as pd
import numpy as np
import os
import zipfile
import io
import shutil

# --- Configuration ---
INPUT_ZIP_NAME = 'initial_well_logs.zip'
OUTPUT_FOLDER_NAME = 'training_and_testing_wells'
TRAINING_PATH = os.path.join(OUTPUT_FOLDER_NAME, 'training_wells')
TEST_WELLS = ['S1', 'S2', 'S3'] # Wells to EXCLUDE

# Set a random seed for reproducible results
np.random.seed(42)

# --- Define the 'Training' processing function (V_SH + RHOB logic) ---
def generate_synthetic_data_train(df):
    num_rows = len(df)
    if num_rows == 0: return None

    # Check for required columns
    if 'V_SH' not in df.columns or 'RHOB' not in df.columns:
        print(f"      -> ERROR: V_SH or RHOB missing. Cannot use this logic. Skipping.")
        return None

    # Handle potential missing values (e.g., fill with the mean)
    df['V_SH'] = df['V_SH'].fillna(df['V_SH'].mean())
    df['RHOB'] = df['RHOB'].fillna(df['RHOB'].mean())

    strength_factor = (df['RHOB'] - df['RHOB'].min()) + (1 - df['V_SH'])
    strength_factor_norm = (strength_factor - strength_factor.min()) / (strength_factor.max() - strength_factor.min())
    df['synthetic_rock_strength'] = strength_factor_norm

    # --- Generate Operator-Controlled Parameters (WOB, RPM) ---
    wob_base = 20.0
    wob_strength_effect = 30.0
    wob_noise = np.random.normal(0, 4, num_rows)
    wt_on_bit = wob_base + (df['synthetic_rock_strength'] * wob_strength_effect) + wob_noise
    df['wt_on_bit'] = np.clip(wt_on_bit, 5, 70)

    rpm_base = 160.0
    rpm_strength_effect = -80.0
    rpm_noise = np.random.normal(0, 10, num_rows)
    surface_rpm = rpm_base + (df['synthetic_rock_strength'] * rpm_strength_effect) + rpm_noise
    df['surface_rpm'] = np.clip(surface_rpm, 50, 220)

    # --- Generate Resulting Parameter (ROP) ---
    wob_norm = (df['wt_on_bit'] - df['wt_on_bit'].mean()) / df['wt_on_bit'].std()
    rpm_norm = (df['surface_rpm'] - df['surface_rpm'].mean()) / df['surface_rpm'].std()

    rop_base = 80.0
    rop_wob_effect = 30.0
    rop_rpm_effect = 25.0
    rop_strength_effect = 50.0
    rop_noise = np.random.normal(0, 15, num_rows)

    rop_average = (
        rop_base +
        (rop_wob_effect * wob_norm) +
        (rop_rpm_effect * rpm_norm) -
        (rop_strength_effect * df['synthetic_rock_strength']) +
        rop_noise
    )

    df['rop_avg'] = np.clip(rop_average, 5, 350)
    return df

# --- Main Script ---
print("--- Starting Block 1: Processing Training Wells ---")

# Clean up old runs (if any)
if os.path.exists(OUTPUT_FOLDER_NAME):
    print(f"Cleaning up old folder: '{OUTPUT_FOLDER_NAME}'")
    shutil.rmtree(OUTPUT_FOLDER_NAME)

# Create the new directory structure
os.makedirs(TRAINING_PATH, exist_ok=True)
print(f"Created directory: {TRAINING_PATH}")

try:
    with zipfile.ZipFile(INPUT_ZIP_NAME, 'r') as zip_in:
        print(f"Successfully opened '{INPUT_ZIP_NAME}'.")

        for file_info in zip_in.infolist():
            file_name = file_info.filename

            # --- This is the key logic ---
            # Get well key: e.g., "initial_well_logs/A2.csv" -> "A2"
            well_key = os.path.splitext(os.path.basename(file_name))[0]

            # Check if it's a CSV, not a junk file, AND not in our test list
            if (file_name.endswith('.csv') and
                not file_name.startswith('__') and
                well_key not in TEST_WELLS and
                well_key != 'initial_well_logs'): # Handle root folder case

                print(f"  Processing TRAINING well: {well_key}...")

                with zip_in.open(file_name) as f:
                    df = pd.read_csv(io.TextIOWrapper(f, 'utf-8'))

                df_processed = generate_synthetic_data_train(df)

                if df_processed is not None:
                    output_file_name = f"{well_key}_usable.csv"
                    output_file_path = os.path.join(TRAINING_PATH, output_file_name)
                    df_processed.to_csv(output_file_path, index=False)
                    print(f"    -> Saved to {output_file_path}")
                else:
                    print(f"    -> Skipped {well_key} (processing error or empty).")

    print("\n--- Block 1 Complete ---")

except FileNotFoundError:
    print(f"--- ERROR: '{INPUT_ZIP_NAME}' not found! Please upload it. ---")
except Exception as e:
    print(f"An error occurred: {e}")

--- Starting Block 1: Processing Training Wells ---
Cleaning up old folder: 'training_and_testing_wells'
Created directory: training_and_testing_wells/training_wells
Successfully opened 'initial_well_logs.zip'.
  Processing TRAINING well: D4...
    -> Saved to training_and_testing_wells/training_wells/D4_usable.csv
  Processing TRAINING well: D29...
    -> Saved to training_and_testing_wells/training_wells/D29_usable.csv
  Processing TRAINING well: D15...
    -> Saved to training_and_testing_wells/training_wells/D15_usable.csv
  Processing TRAINING well: D14...
    -> Saved to training_and_testing_wells/training_wells/D14_usable.csv
  Processing TRAINING well: D5...
    -> Saved to training_and_testing_wells/training_wells/D5_usable.csv
  Processing TRAINING well: D7...
    -> Saved to training_and_testing_wells/training_wells/D7_usable.csv
  Processing TRAINING well: D16...
    -> Saved to training_and_testing_wells/training_wells/D16_usable.csv
  Processing TRAINING well: D17...
    

Process S1, S2, S3 for "Expected Results"

In [3]:
import pandas as pd
import numpy as np
import os
import zipfile
import io

# --- Configuration ---
INPUT_ZIP_NAME = 'initial_well_logs.zip'
OUTPUT_FOLDER_NAME = 'training_and_testing_wells'
EXPECTED_PATH = os.path.join(OUTPUT_FOLDER_NAME, 'testing_wells', 'expected_result')
TEST_WELLS = ['S1', 'S2', 'S3'] # Wells to INCLUDE

# Set a random seed for reproducible results
np.random.seed(42)

# --- Define the 'Expected' processing function (GR + RHOB + ROP) ---
def generate_synthetic_data_expected(df):
    num_rows = len(df)
    if num_rows == 0: return None

    # Check for required columns
    if 'GR' not in df.columns or 'RHOB' not in df.columns:
        print(f"      -> ERROR: GR or RHOB missing. Cannot use this logic. Skipping.")
        return None

    df['GR'] = df['GR'].fillna(df['GR'].mean())
    df['RHOB'] = df['RHOB'].fillna(df['RHOB'].mean())
    gr_norm = (df['GR'] - df['GR'].mean()) / df['GR'].std()
    rhob_norm = (df['RHOB'] - df['RHOB'].mean()) / df['RHOB'].std()
    strength_proxy = rhob_norm - gr_norm
    df['synthetic_rock_strength'] = (strength_proxy - strength_proxy.min()) / (strength_proxy.max() - strength_proxy.min())

    wob_base = 20.0
    wob_strength_effect = 30.0
    wob_noise = np.random.normal(0, 4, num_rows)
    wt_on_bit = wob_base + (df['synthetic_rock_strength'] * wob_strength_effect) + wob_noise
    df['wt_on_bit'] = np.clip(wt_on_bit, 5, 70)

    rpm_base = 160.0
    rpm_strength_effect = -80.0
    rpm_noise = np.random.normal(0, 10, num_rows)
    surface_rpm = rpm_base + (df['synthetic_rock_strength'] * rpm_strength_effect) + rpm_noise
    df['surface_rpm'] = np.clip(surface_rpm, 50, 220)

    # --- Generate Resulting Parameter (ROP) ---
    wob_norm = (df['wt_on_bit'] - df['wt_on_bit'].mean()) / df['wt_on_bit'].std()
    rpm_norm = (df['surface_rpm'] - df['surface_rpm'].mean()) / df['surface_rpm'].std()

    rop_base = 80.0
    rop_wob_effect = 30.0
    rop_rpm_effect = 25.0
    rop_strength_effect = 50.0
    rop_noise = np.random.normal(0, 15, num_rows)

    rop_average = (
        rop_base +
        (rop_wob_effect * wob_norm) +
        (rop_rpm_effect * rpm_norm) -
        (rop_strength_effect * df['synthetic_rock_strength']) +
        rop_noise
    )

    df['rop_avg'] = np.clip(rop_average, 5, 350)
    return df

# --- Main Script ---
print("--- Starting Block 2: Processing 'Expected Result' Wells ---")

# Create the new directory structure
os.makedirs(EXPECTED_PATH, exist_ok=True)
print(f"Created directory: {EXPECTED_PATH}")

try:
    with zipfile.ZipFile(INPUT_ZIP_NAME, 'r') as zip_in:
        print(f"Successfully opened '{INPUT_ZIP_NAME}'.")

        for file_info in zip_in.infolist():
            file_name = file_info.filename

            # Get well key: e.g., "initial_well_logs/S1.csv" -> "S1"
            well_key = os.path.splitext(os.path.basename(file_name))[0]

            # Check if it's one of our test wells
            if (file_name.endswith('.csv') and
                well_key in TEST_WELLS):

                print(f"  Processing EXPECTED well: {well_key}...")

                with zip_in.open(file_name) as f:
                    df = pd.read_csv(io.TextIOWrapper(f, 'utf-8'))

                df_processed = generate_synthetic_data_expected(df)

                if df_processed is not None:
                    output_file_name = f"{well_key}_testing_expected_result.csv"
                    output_file_path = os.path.join(EXPECTED_PATH, output_file_name)
                    df_processed.to_csv(output_file_path, index=False)
                    print(f"    -> Saved to {output_file_path}")
                else:
                    print(f"    -> Skipped {well_key} (processing error or empty).")

    print("\n--- Block 2 Complete ---")

except FileNotFoundError:
    print(f"--- ERROR: '{INPUT_ZIP_NAME}' not found! Please upload it. ---")
except Exception as e:
    print(f"An error occurred: {e}")

--- Starting Block 2: Processing 'Expected Result' Wells ---
Created directory: training_and_testing_wells/testing_wells/expected_result
Successfully opened 'initial_well_logs.zip'.
  Processing EXPECTED well: S2...
    -> Saved to training_and_testing_wells/testing_wells/expected_result/S2_testing_expected_result.csv
  Processing EXPECTED well: S3...
    -> Saved to training_and_testing_wells/testing_wells/expected_result/S3_testing_expected_result.csv
  Processing EXPECTED well: S1...
    -> Saved to training_and_testing_wells/testing_wells/expected_result/S1_testing_expected_result.csv

--- Block 2 Complete ---


Process S1, S2, S3 for "Test Dataset"

In [4]:
import pandas as pd
import numpy as np
import os
import zipfile
import io

# --- Configuration ---
INPUT_ZIP_NAME = 'initial_well_logs.zip'
OUTPUT_FOLDER_NAME = 'training_and_testing_wells'
TEST_DATASET_PATH = os.path.join(OUTPUT_FOLDER_NAME, 'testing_wells', 'test_dataset')
TEST_WELLS = ['S1', 'S2', 'S3'] # Wells to INCLUDE

# Set a random seed for reproducible results
np.random.seed(42)

# --- Define the 'Test' processing function (GR + RHOB, NO ROP) ---
def generate_synthetic_data_test(df):
    num_rows = len(df)
    if num_rows == 0: return None

    # Check for required columns
    if 'GR' not in df.columns or 'RHOB' not in df.columns:
        print(f"      -> ERROR: GR or RHOB missing. Cannot use this logic. Skipping.")
        return None

    df['GR'] = df['GR'].fillna(df['GR'].mean())
    df['RHOB'] = df['RHOB'].fillna(df['RHOB'].mean())
    gr_norm = (df['GR'] - df['GR'].mean()) / df['GR'].std()
    rhob_norm = (df['RHOB'] - df['RHOB'].mean()) / df['RHOB'].std()
    strength_proxy = rhob_norm - gr_norm
    df['synthetic_rock_strength'] = (strength_proxy - strength_proxy.min()) / (strength_proxy.max() - strength_proxy.min())

    wob_base = 20.0
    wob_strength_effect = 30.0
    wob_noise = np.random.normal(0, 4, num_rows)
    wt_on_bit = wob_base + (df['synthetic_rock_strength'] * wob_strength_effect) + wob_noise
    df['wt_on_bit'] = np.clip(wt_on_bit, 5, 70)

    rpm_base = 160.0
    rpm_strength_effect = -80.0
    rpm_noise = np.random.normal(0, 10, num_rows)
    surface_rpm = rpm_base + (df['synthetic_rock_strength'] * rpm_strength_effect) + rpm_noise
    df['surface_rpm'] = np.clip(surface_rpm, 50, 220)

    # --- Note: We do NOT generate rop_avg here ---

    return df

# --- Main Script ---
print("--- Starting Block 3: Processing 'Test Dataset' Wells ---")

# Create the new directory structure
os.makedirs(TEST_DATASET_PATH, exist_ok=True)
print(f"Created directory: {TEST_DATASET_PATH}")

try:
    with zipfile.ZipFile(INPUT_ZIP_NAME, 'r') as zip_in:
        print(f"Successfully opened '{INPUT_ZIP_NAME}'.")

        for file_info in zip_in.infolist():
            file_name = file_info.filename

            # Get well key: e.g., "initial_well_logs/S1.csv" -> "S1"
            well_key = os.path.splitext(os.path.basename(file_name))[0]

            # Check if it's one of our test wells
            if (file_name.endswith('.csv') and
                well_key in TEST_WELLS):

                print(f"  Processing TEST well: {well_key}...")

                with zip_in.open(file_name) as f:
                    df = pd.read_csv(io.TextIOWrapper(f, 'utf-8'))

                df_processed = generate_synthetic_data_test(df)

                if df_processed is not None:
                    output_file_name = f"{well_key}_usable_fortesting.csv"
                    output_file_path = os.path.join(TEST_DATASET_PATH, output_file_name)
                    df_processed.to_csv(output_file_path, index=False)
                    print(f"    -> Saved to {output_file_path}")
                else:
                    print(f"    -> Skipped {well_key} (processing error or empty).")

    print("\n--- Block 3 Complete ---")

except FileNotFoundError:
    print(f"--- ERROR: '{INPUT_ZIP_NAME}' not found! Please upload it. ---")
except Exception as e:
    print(f"An error occurred: {e}")

--- Starting Block 3: Processing 'Test Dataset' Wells ---
Created directory: training_and_testing_wells/testing_wells/test_dataset
Successfully opened 'initial_well_logs.zip'.
  Processing TEST well: S2...
    -> Saved to training_and_testing_wells/testing_wells/test_dataset/S2_usable_fortesting.csv
  Processing TEST well: S3...
    -> Saved to training_and_testing_wells/testing_wells/test_dataset/S3_usable_fortesting.csv
  Processing TEST well: S1...
    -> Saved to training_and_testing_wells/testing_wells/test_dataset/S1_usable_fortesting.csv

--- Block 3 Complete ---


Zip the Final Directory

In [5]:
import os
import shutil

# --- Configuration ---
OUTPUT_FOLDER_NAME = 'training_and_testing_wells'
OUTPUT_ZIP_NAME = 'training_and_testing_wells' # .zip is added automatically

print("--- Starting Block 4: Zipping Final Folder ---")

try:
    if os.path.exists(OUTPUT_FOLDER_NAME):
        # Zip the entire output folder
        print(f"Zipping folder '{OUTPUT_FOLDER_NAME}'...")
        shutil.make_archive(
            OUTPUT_ZIP_NAME,  # The name of the zip file to create
            'zip',            # The format
            OUTPUT_FOLDER_NAME # The folder to zip
        )

        print(f"Successfully created '{OUTPUT_ZIP_NAME}.zip'")

        # Clean up the temporary folder
        print(f"Cleaning up temporary folder '{OUTPUT_FOLDER_NAME}'...")
        shutil.rmtree(OUTPUT_FOLDER_NAME)

        print("\n--- Block 4 Complete ---")
        print("All tasks finished. You can now download your zip file.")

    else:
        print(f"--- ERROR: Folder '{OUTPUT_FOLDER_NAME}' not found. ---")
        print("Please run Blocks 1, 2, and 3 first.")

except Exception as e:
    print(f"An error occurred: {e}")

--- Starting Block 4: Zipping Final Folder ---
Zipping folder 'training_and_testing_wells'...
Successfully created 'training_and_testing_wells.zip'
Cleaning up temporary folder 'training_and_testing_wells'...

--- Block 4 Complete ---
All tasks finished. You can now download your zip file.
