<a href="https://colab.research.google.com/github/SAHIL9581/w2w/blob/main/W2W_WNB_INFERENCE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
#@title 1. Setup Environment & Install Libraries

# --- 1. Install Required Libraries ---
print("--> Installing libraries for inference...")
!pip install wandb torch lasio scikit-learn pandas matplotlib joblib pyyaml -q
print("✅ Installation complete.")

# --- 2. Create Temporary Workspace ---
import os
os.makedirs("/content/inference_data", exist_ok=True)
os.makedirs("/content/inference_artifacts", exist_ok=True)
os.chdir("/content/inference_data")
print(f"✅ Workspace created at {os.getcwd()}")

--> Installing libraries for inference...
✅ Installation complete.
✅ Workspace created at /content/inference_data


In [25]:
#@title 2. Login to Weights & Biases
import wandb

print("--> ACTION REQUIRED: Please log in to your Weights & Biases account.")
!wandb login

--> ACTION REQUIRED: Please log in to your Weights & Biases account.
[34m[1mwandb[0m: Currently logged in as: [33msahilpareek203[0m ([33msahilpareek203-amrita-vishwa-vidyapeetham[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [26]:
#@title 3. Configure Inference Run

# --- ACTION REQUIRED: Fill in your W&B project details ---
# You can find these details in the URL of your previous W&B runs.
# Example URL: https://wandb.ai/your-entity/your-project/runs/...

WANDB_ENTITY = "sahilpareek203-amrita-vishwa-vidyapeetham"  # Your W&B username or team name
WANDB_PROJECT = "W2W_Matcher_Pipeline_Notebook"       # The project name where your model was saved
MODEL_VERSION = "latest"  # Use "latest" to get the most recent model, or a specific version like "v0"

# --- Leave the rest of the config as is ---
config = {
    "paths": {
        "processed_csv_path": "full_well_data.csv",
        "label_encoder_path": "inference_artifacts/label_encoder.json",
        "std_scaler_path": "inference_artifacts/StandardScaler.bin",
        "final_model_path": "inference_artifacts/final_model.pt"
    },
    "finetuning": {
        "model_params": {
            "patch_height": 700, "act_name": "prelu",
            "hidden_dim": 256, "num_queries": 100,
            "num_heads": 8, "dropout": 0.1,
            "num_transformers": 6, "output_size": 3
        }
    },
    "inference": {
        "correlation_threshold": 0.7
    }
}

print("✅ Inference configuration set.")

✅ Inference configuration set.


In [27]:
#@title 4. Download Production Artifacts from W&B (Final Corrected Version)
import wandb
import os

print("--> Connecting to W&B to download production artifacts...")
# Initialize the API to access the project
api = wandb.Api()
entity_project_path = f"{WANDB_ENTITY}/{WANDB_PROJECT}"

try:
    # Download the main trained model
    model_artifact_name = f'boundary-detector-model:{MODEL_VERSION}'
    print(f"--> Downloading model: {model_artifact_name}")
    model_artifact = api.artifact(f'{entity_project_path}/{model_artifact_name}', type='model')
    model_artifact.download(root=os.path.dirname(config['paths']['final_model_path']))
    print("✅ Model downloaded successfully.")

    # Download the StandardScaler
    scaler_artifact_name = 'StandardScaler:latest'
    print(f"--> Downloading StandardScaler: {scaler_artifact_name}")
    scaler_artifact = api.artifact(f'{entity_project_path}/{scaler_artifact_name}', type='preprocessor')
    scaler_artifact.download(root=os.path.dirname(config['paths']['std_scaler_path']))
    print("✅ StandardScaler downloaded successfully.")

    # Download the Label Encoder
    encoder_artifact_name = 'LabelEncoder:latest'
    print(f"--> Downloading Label Encoder: {encoder_artifact_name}")
    encoder_artifact = api.artifact(f'{entity_project_path}/{encoder_artifact_name}', type='preprocessor')
    encoder_artifact.download(root=os.path.dirname(config['paths']['label_encoder_path']))
    print("✅ Label Encoder downloaded successfully.")

except Exception as e:
    print("\n" + "="*50)
    print("🚨 ERROR DOWNLOADING ARTIFACTS 🚨")
    print(f"An error occurred: {e}")
    print("\nPLEASE CHECK THE FOLLOWING:")
    print("1. Did you successfully re-run the TRAINING notebook with the corrected Cell 13?")
    print(f"2. Do you see 'StandardScaler' and 'LabelEncoder' in the 'Artifacts' tab of your W&B project: https://wandb.ai/{entity_project_path}/artifacts")
    print(f"3. Are the WANDB_ENTITY and WANDB_PROJECT names in Cell 3 of THIS notebook spelled correctly?")
    print("="*50)

print("\n--- All necessary artifacts should now be available locally. ---")

--> Connecting to W&B to download production artifacts...
--> Downloading model: boundary-detector-model:latest


[34m[1mwandb[0m:   1 of 1 files downloaded.  


✅ Model downloaded successfully.
--> Downloading StandardScaler: StandardScaler:latest


[34m[1mwandb[0m:   1 of 1 files downloaded.  


✅ StandardScaler downloaded successfully.
--> Downloading Label Encoder: LabelEncoder:latest


[34m[1mwandb[0m:   1 of 1 files downloaded.  


✅ Label Encoder downloaded successfully.

--- All necessary artifacts should now be available locally. ---


In [28]:
#@title 5. Upload Full LAS Dataset (ZIP file)
from google.colab import files
import os
import pandas as pd
import lasio
from joblib import load
import json

print(">>> ACTION REQUIRED: Please upload the same ZIP file with all .las files used for training.")
uploaded = files.upload()

if uploaded:
    zip_filename = list(uploaded.keys())[0]
    print(f"\n✅ '{zip_filename}' uploaded. Processing into a single CSV for lookup...")

    # Unzip and process the files
    os.makedirs("raw_las", exist_ok=True)
    !unzip -q -o "{zip_filename}" -d raw_las/

    all_wells_df, las_files_found = [], []
    for root, dirs, files in os.walk("raw_las"):
        for file in files:
            if file.lower().endswith('.las'):
                las_files_found.append(os.path.join(root, file))

    for filepath in las_files_found:
        try:
            las = lasio.read(filepath)
            df = las.df().reset_index()
            df['WELL'] = las.well.WELL.value or os.path.splitext(os.path.basename(filepath))[0]
            df['GROUP'] = 'UNKNOWN'
            for param in las.params:
                if 'GROUP' in param.mnemonic.upper(): df['GROUP'] = param.value
            all_wells_df.append(df)
        except Exception as e: print(f"    - Could not read {filepath}: {e}")

    master_df = pd.concat(all_wells_df, ignore_index=True)
    if 'DEPT' in master_df.columns: master_df.rename(columns={'DEPT': 'DEPTH_MD'}, inplace=True)
    master_df.to_csv(config['paths']['processed_csv_path'], index=False, sep=';')

    print(f"\n✅ Successfully processed {len(las_files_found)} files into '{config['paths']['processed_csv_path']}'.")

    # Print available wells for convenience
    print("\n--- Available Well Names for Correlation ---")
    for well in sorted(master_df['WELL'].unique()):
        print(f"- {well}")
    print("------------------------------------------")

    # Clean up
    os.remove(zip_filename)
    !rm -rf raw_las
else:
    print("\n⚠️ Upload cancelled.")

>>> ACTION REQUIRED: Please upload the same ZIP file with all .las files used for training.


Saving train.zip to train.zip

✅ 'train.zip' uploaded. Processing into a single CSV for lookup...

✅ Successfully processed 118 files into 'full_well_data.csv'.

--- Available Well Names for Correlation ---
- 15/9-13 Sleipner East Appr
- 15/9-14
- 15/9-15 Gungne
- 15/9-17
- 15/9-23 Skardkollen
- 16/1-2  Ivar Aasen Appr
- 16/1-6 A Verdandi Appr
- 16/10-1 Alpha
- 16/10-2 Delta
- 16/10-3 Tyr Central
- 16/10-5 Isbjoern
- 16/11-1S T3
- 16/2-11 A Johan Sverdrup Appr
- 16/2-16 Johan Sverdrup Appr
- 16/2-6 Johan Sverdrup
- 16/2-7 Johan Sverdrup Appr
- 16/4-1
- 16/5-3 Johan Sverdrup Appr
- 16/7-4 Sigyn
- 16/7-5
- 16/7-6
- 16/8-1
- 17/11-1
- 17/4-1
- 25/10-10  Balder Triassic
- 25/10-9 Aegis
- 25/11-15  Grane
- 25/11-19 S  Balder Appr
- 25/11-24 Jakob South
- 25/11-5 Balder Appr
- 25/2-13 T4
- 25/2-14 Froey Appr
- 25/2-7
- 25/3-1
- 25/4-5
- 25/5-1 Froey
- 25/5-3  Skirne
- 25/5-4  Byggve
- 25/6-1
- 25/6-2  Delta-Beta
- 25/6-3
- 25/7-2
- 25/8-5 S  Jotun
- 25/8-7  Krap 1
- 25/9-1  Rummel
- 26/4-1
-

In [29]:
#@title 6. Define Model Architectures
import torch
import torch.nn as nn

def get_activation(name): return nn.PReLU() if name == 'prelu' else nn.ReLU() if name == 'relu' else nn.GELU()

class Block1D(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2, kernel_size=3, activation='prelu'):
        super().__init__(); self.b = nn.Sequential(nn.Conv1d(in_channels,out_channels,kernel_size,stride,padding=kernel_size//2), nn.BatchNorm1d(out_channels), get_activation(activation), nn.Conv1d(out_channels,out_channels,kernel_size,1,padding=kernel_size//2), nn.BatchNorm1d(out_channels), get_activation(activation))
    def forward(self, x): return self.b(x)

class UNetEncoder1D(nn.Module):
    def __init__(self, in_channels, activation='prelu'):
        super().__init__(); self.start=Block1D(in_channels,32,stride=1,activation=activation); self.e1=Block1D(32,64,stride=2,activation=activation); self.e2=Block1D(64,128,stride=2,activation=activation); self.e3=Block1D(128,256,stride=2,activation=activation); self.mid=Block1D(256,512,stride=2,activation=activation)
    def forward(self, x): x=x.squeeze(1).permute(0,2,1); s1=self.start(x); s2=self.e1(s1); s3=self.e2(s2); s4=self.e3(s3); return self.mid(s4)

class Project(nn.Module):
    def __init__(self,i,o): super().__init__(); self.l=nn.Linear(i,o)
    def forward(self,x): return self.l(x.flatten(1))

class Query(nn.Module):
    def __init__(self,s,d): super().__init__(); self.q=nn.Parameter(torch.randn(1,s,d))
    def forward(self,x): return self.q.repeat(x.shape[0],1,1)

class Transformer(nn.Module):
    def __init__(self,i,n,d): super().__init__(); self.t=nn.TransformerEncoderLayer(d_model=i,nhead=n,dropout=d,batch_first=True,dim_feedforward=i*4)
    def forward(self,q,c): return self.t(q)

class W2WTransformerModel(nn.Module):
    def __init__(self,c):
        super().__init__()
        p=c['finetuning']['model_params']
        # Dynamically calculate the flattened feature size
        with torch.no_grad():
            dummy_encoder = UNetEncoder1D(p['in_channels'], p['act_name'])
            dummy_output = dummy_encoder(torch.randn(1, 1, p['patch_height'], p['in_channels']))
            p_in = dummy_output.flatten(1).shape[1]

        self.encoder = UNetEncoder1D(p['in_channels'], p['act_name'])
        self.project = Project(p_in, p['hidden_dim'])
        self.query = Query(p['num_queries'], p['hidden_dim'])
        self.transformers = nn.ModuleList([Transformer(p['hidden_dim'], p['num_heads'], p['dropout']) for _ in range(p['num_transformers'])])
        self.finalize = nn.Sequential(nn.Linear(p['hidden_dim'], p['output_size']), get_activation(p['act_name']), nn.LayerNorm(p['output_size']))

    def forward(self,img):
        seq = self.project(self.encoder(img)).unsqueeze(1)
        q = self.query(seq)
        for t in self.transformers: q = t(q, seq)
        return self.finalize(q)

print("✅ Model architectures defined.")

✅ Model architectures defined.


In [30]:
#@title 7. Define Plotting Logic
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

def plot_well_correlation(well1,well2,layers1,layers2,matrix,threshold,path):
    plt.style.use('ggplot'); fig,ax=plt.subplots(figsize=(10,12))
    if not layers1 or not layers2: print(f'Warning: Plotting skipped. Well 1 layers:{len(layers1)}, Well 2:{len(layers2)}'); return
    max_depth=max(layers1[-1]['bottom'],layers2[-1]['bottom']) if layers1 and layers2 else 1000
    ax.set_ylim(max_depth+50,-50); ax.set_xlim(-0.5,2.5)
    n1=len(set(l['Group'] for l in layers1)); n2=len(set(l['Group'] for l in layers2))
    for l in layers1: ax.add_patch(patches.Rectangle((0,l['Top']),1,l['Height'],ec='k',fc=plt.cm.viridis(l['Group']/(n1 if n1>0 else 1)),alpha=0.6))
    for l in layers2: ax.add_patch(patches.Rectangle((1.5,l['Top']),1,l['Height'],ec='k',fc=plt.cm.viridis(l['Group']/(n2 if n2>0 else 1)),alpha=0.6))
    for i,row in enumerate(matrix):
        for j,sim in enumerate(row):
            if sim>=threshold: ax.add_patch(patches.Polygon([[1,layers1[i]['Top']],[1,layers1[i]['bottom']],[1.5,layers2[j]['bottom']],[1.5,layers2[j]['Top']]],fc=plt.cm.Greens(sim),alpha=0.5))
    ax.set_xticks([0.5,2]); ax.set_xticklabels([well1,well2],fontsize=14); ax.set_ylabel('Depth',fontsize=12); ax.set_title('Well to Well Correlation',fontsize=16); plt.savefig(path); plt.close()
    print(f'--> Correlation plot saved to {path}')

def generate_single_correlation_plot(config,full_data,ref_name,woi_name,out_path):
    inf,p=config['inference'],config['paths']; ref_df,woi_df=full_data[full_data['WELL']==ref_name],full_data[full_data['WELL']==woi_name]
    if ref_df.empty or woi_df.empty: print(f"Error: Could not find '{ref_name}' or '{woi_name}'. Please check names."); return False
    with open(p['label_encoder_path']) as f: le=json.load(f)
    def get_layers(df):
        df=df.copy().reset_index(drop=True); df['gid']=df['GROUP'].astype(str).map(le).fillna(-1).astype(int); b=np.where(df['gid'].iloc[:-1].values!=df['gid'].iloc[1:].values)[0]+1
        indices=np.concatenate(([0],b,[len(df)])); layers=[]
        for i in range(len(indices)-1):
            s,e=indices[i],indices[i+1]
            if s<e: layers.append({'Top':df['DEPTH_MD'].iloc[s],'bottom':df['DEPTH_MD'].iloc[e-1],'Height':df['DEPTH_MD'].iloc[e-1]-df['DEPTH_MD'].iloc[s],'Group':df['gid'].iloc[s]})
        return layers
    ref_l,woi_l=get_layers(ref_df),get_layers(woi_df); sim=np.zeros((len(ref_l),len(woi_l)))

    # This is the MOCK INFERENCE part. A real implementation would use the model here.
    print('--> MOCK INFERENCE: Using ground truth layers for visualization.')
    for i,l1 in enumerate(ref_l):
        for j,l2 in enumerate(woi_l): sim[i,j]=np.random.uniform(0.8,0.95) if l1['Group']==l2['Group'] and l1['Group']!=-1 else np.random.uniform(0.1,0.4)

    plot_well_correlation(ref_name,woi_name,ref_l,woi_l,sim,inf['correlation_threshold'],out_path); return True

print("✅ Plotting logic defined.")

✅ Plotting logic defined.


In [31]:
#@title 8. 🚀 Inference Dashboard: Generate and Log Plots

import torch
from joblib import load
import pandas as pd

# --- ACTION REQUIRED: Define the well pairs you want to plot ---
# Copy and paste valid well names from the output of Cell 5.
well_pairs_to_plot = [
    ("15_9-13 Sleipner East Appr", "16/1-2  Ivar Aasen Appr"),
    ("16/2-6 Johan Sverdrup", "16/5-3 Johan Sverdrup Appr"),
    ("35/11-1", "35/11-6"),
    # Add more pairs here...
]
# -----------------------------------------------------------------

print("--- Initializing Inference Run ---")
# 1. Dynamically get the number of input features from the saved scaler
scaler = load(config['paths']['std_scaler_path'])
config['finetuning']['model_params']['in_channels'] = scaler.n_features_in_
print(f"Loaded scaler with {scaler.n_features_in_} features.")

# 2. Load the trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = W2WTransformerModel(config).to(device)
model.load_state_dict(torch.load(config['paths']['final_model_path'], map_location=device))
model.eval()
print(f"✅ Model '{config['paths']['final_model_path']}' loaded successfully onto {device}.")

# 3. Load the full dataset for lookups
full_data = pd.read_csv(config['paths']['processed_csv_path'], delimiter=';')
print(f"✅ Full dataset with {len(full_data.WELL.unique())} wells loaded.")

# 4. Generate plots and log to W&B
with wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY, job_type='inference-dashboard') as run:
    print(f"\n--> W&B Run for multiple plots started. View at: {run.url}")
    plots_to_log = {}
    for i, (well1, well2) in enumerate(well_pairs_to_plot):
        print(f"\n--- Generating plot for: {well1} vs {well2} ---")
        # Create a filesystem-safe filename
        safe_well1 = well1.replace('/','-').replace(' ','_')
        safe_well2 = well2.replace('/','-').replace(' ','_')
        output_filename = f"correlation_{safe_well1}_vs_{safe_well2}.png"

        # NOTE: This still uses the MOCK inference logic.
        # To use the real model, you would pass data patches through `model(patches)`
        # and interpret the output to create the similarity matrix.
        success = generate_single_correlation_plot(config, full_data, well1, well2, output_filename)

        if success:
            plots_to_log[f"Plot_{i+1}_{well1}_vs_{well2}"] = wandb.Image(output_filename)

    if plots_to_log:
        print("\n--> Logging all plots to Weights & Biases...")
        wandb.log(plots_to_log)
        print("✅ All plots logged successfully.")
    else:
        print("\n--> No plots were generated to log.")

--- Initializing Inference Run ---
Loaded scaler with 25 features.
✅ Model 'inference_artifacts/final_model.pt' loaded successfully onto cpu.
✅ Full dataset with 118 wells loaded.



--> W&B Run for multiple plots started. View at: https://wandb.ai/sahilpareek203-amrita-vishwa-vidyapeetham/W2W_Matcher_Pipeline_Notebook/runs/hql4nox7

--- Generating plot for: 15_9-13 Sleipner East Appr vs 16/1-2  Ivar Aasen Appr ---
Error: Could not find '15_9-13 Sleipner East Appr' or '16/1-2  Ivar Aasen Appr'. Please check names.

--- Generating plot for: 16/2-6 Johan Sverdrup vs 16/5-3 Johan Sverdrup Appr ---
--> MOCK INFERENCE: Using ground truth layers for visualization.
--> Correlation plot saved to correlation_16-2-6_Johan_Sverdrup_vs_16-5-3_Johan_Sverdrup_Appr.png

--- Generating plot for: 35/11-1 vs 35/11-6 ---
--> MOCK INFERENCE: Using ground truth layers for visualization.
--> Correlation plot saved to correlation_35-11-1_vs_35-11-6.png

--> Logging all plots to Weights & Biases...
✅ All plots logged successfully.
