<a href="https://colab.research.google.com/github/parthpranav2/ROP-Optimization/blob/main/Synthetic_drilling_params_inducer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import sys

# --- Configuration ---
# This is the file you upload to Colab
INPUT_FILE_NAME = 'A2.csv'
# This is the file that will be created
OUTPUT_FILE_NAME = 'A2_with_realistic_synthetic_data.csv'

# Set a random seed for reproducible results
np.random.seed(42)

# --- 1. Load Data ---
try:
    # In Colab, this reads from the "Files" section
    df = pd.read_csv(INPUT_FILE_NAME)
except FileNotFoundError:
    print(f"--- ERROR ---")
    print(f"Input file '{INPUT_FILE_NAME}' not found.")
    print("Please make sure you have uploaded the file to the Colab files section (left-hand sidebar).")
    # sys.exit() # Stop the script if the file isn't found
except Exception as e:
    print(f"An error occurred loading the file: {e}")
    # sys.exit()

if 'df' in locals():
    num_rows = len(df)
    print(f"Successfully loaded '{INPUT_FILE_NAME}' with {num_rows} rows.")

    # --- 2. Create a 'Rock Strength' Proxy ---
    # This makes the data "realistic" by linking it to existing logs.
    # Assumption: Shale (high V_SH) is softer. Dense rock (high RHOB) is harder.

    # Handle potential missing values (e.g., fill with the mean)
    df['V_SH'] = df['V_SH'].fillna(df['V_SH'].mean())
    df['RHOB'] = df['RHOB'].fillna(df['RHOB'].mean())

    # Create the factor: high RHOB -> high strength, high V_SH -> low strength
    strength_factor = (df['RHOB'] - df['RHOB'].min()) + (1 - df['V_SH'])

    # Normalize the factor to be between 0 (softest) and 1 (hardest)
    # This makes it easier to use in the model
    strength_factor_norm = (strength_factor - strength_factor.min()) / (strength_factor.max() - strength_factor.min())
    df['synthetic_rock_strength'] = strength_factor_norm

    print("Created 'synthetic_rock_strength' proxy from V_SH and RHOB.")

    # --- 3. Generate Operator-Controlled Parameters (WOB, RPM) ---
    # We simulate a driller's decisions based on the rock strength.

    # wt_on_bit: Driller applies *more* weight in harder rock.
    wob_base = 20.0  # Base weight (k-lbs)
    wob_strength_effect = 30.0 # Max extra weight for hard rock
    wob_noise = np.random.normal(0, 4, num_rows) # Random variability

    wt_on_bit = wob_base + (df['synthetic_rock_strength'] * wob_strength_effect) + wob_noise
    df['wt_on_bit'] = np.clip(wt_on_bit, 5, 70) # Clip to plausible range [5, 70]

    # Surface_RPM: Driller often uses *lower* RPM in harder rock (e.g., PDC bits).
    rpm_base = 160.0 # Base RPM in soft rock
    rpm_strength_effect = -80.0 # RPM reduction for hard rock
    rpm_noise = np.random.normal(0, 10, num_rows) # Random variability

    surface_rpm = rpm_base + (df['synthetic_rock_strength'] * rpm_strength_effect) + rpm_noise
    df['surface_rpm'] = np.clip(surface_rpm, 50, 220) # Clip to plausible range [50, 220]

    # --- 4. Generate Resulting Parameter (ROP) ---
    # ROP is a *result* of WOB, RPM, and Rock Strength.

    # Normalize WOB and RPM to use them as model inputs
    wob_norm = (df['wt_on_bit'] - df['wt_on_bit'].mean()) / df['wt_on_bit'].std()
    rpm_norm = (df['surface_rpm'] - df['surface_rpm'].mean()) / df['surface_rpm'].std()

    # Linear Model: ROP = Base + (effect of WOB) + (effect of RPM) - (effect of Strength) + Noise
    rop_base = 80.0 # Base ROP in "average" rock (ft/hr)
    rop_wob_effect = 30.0      # ROP increases with WOB
    rop_rpm_effect = 25.0      # ROP increases with RPM
    rop_strength_effect = 50.0 # ROP *decreases* with Rock Strength
    rop_noise = np.random.normal(0, 15, num_rows)

    rop_average = (
        rop_base +
        (rop_wob_effect * wob_norm) +
        (rop_rpm_effect * rpm_norm) -
        (rop_strength_effect * df['synthetic_rock_strength']) +
        rop_noise
    )

    # Clip to a realistic range (ROP > 0)
    df['rop_avg'] = np.clip(rop_average, 5, 350)

    # --- 5. Save the New CSV ---
    # This saves the file to the Colab "Files" section
    df.to_csv(OUTPUT_FILE_NAME, index=False)

    print(f"\n--- SUCCESS ---")
    print(f"Successfully generated synthetic data based on rock properties.")
    print(f"New file saved as: '{OUTPUT_FILE_NAME}'")
    print("You can find it in the 'Files' sidebar on the left and download it.")

    print("\nHere's a preview of the new data:")
    # Show columns that demonstrate the relationships
    preview_cols = ['D', 'V_SH', 'RHOB', 'synthetic_rock_strength', 'wt_on_bit', 'surface_rpm', 'rop_avg']
    print(df[preview_cols].head())

Successfully loaded 'A2.csv' with 1167 rows.
Created 'Synthetic_Rock_Strength' proxy from V_SH and RHOB.

--- SUCCESS ---
Successfully generated synthetic data based on rock properties.
New file saved as: 'A2_with_realistic_synthetic_data.csv'
You can find it in the 'Files' sidebar on the left and download it.

Here's a preview of the new data:
        D     V_SH     RHOB  Synthetic_Rock_Strength  Wt_on_bit  surface_RPM  \
0  1358.0  0.53997  2.57017                 0.603415  40.089294   117.365926   
1  1358.2  0.42351  2.50413                 0.643490  38.751631   114.468376   
2  1358.4  0.80831  2.49585                 0.331060  32.522558   142.049345   
3  1358.6  0.77501  2.52327                 0.379322  37.471775   137.243537   
4  1358.8  0.56370  2.54378                 0.563578  35.970726   117.725676   

   ROP_average  
0    79.064744  
1    62.428103  
2    87.404430  
3    93.481378  
4    50.098340  
