# Well Level Produced Gas

README

## Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [None]:
def load_and_preprocess_data(filepath='well_event_pg_values.csv'):
    df_event_pg = pd.read_csv(filepath, parse_dates=['timestamp'])
    df_event_pg['pad'] = df_event_pg['well'].apply(extract_pad)
    df_target_pads = filter_target_pads(df_event_pg)
    return df_target_pads

In [None]:

def extract_pad(well):
    # If well starts with 2 digits and 1 letter (e.g. '15W'), pad = '1' + digits (-> '115')
    m1 = re.match(r'^(\d{2})([A-Za-z])', well)
    if m1:
        return '1' + m1.group(1)
    # If well starts with 2 digits, 2 letters, and 1 digit (e.g. '7WP'), pad = '1' + digits (-> '107')
    m2 = re.match(r'^(\d)([A-Za-z]{2})', well)
    if m2:
        return '10' + m2.group(1)
    # If well starts with 2 digits, 1 letter, 1 letter, and 1 digit (e.g. '16W'), pad = '1' + digits (-> '116')
    m3 = re.match(r'^(\d{2})([A-Za-z]{1})', well)
    if m3:
        return '1' + m3.group(1)
    # If well starts with 3 digits (e.g. '106W13'), pad = those digits
    m4 = re.match(r'^(\d{3})', well)
    if m4:
        return m4.group(1)
    # Otherwise, fallback to first 3 chars
    return well[:3]

In [None]:

def filter_target_pads(df, target_pads=None):
    """
    Returns a DataFrame filtered to only include rows with pad in target_pads.
    Pads are strings, e.g. ['105', '106', '107', '108', '116']
    """
    if target_pads is None:
        target_pads = ['105', '106', '107', '108', '116']
    return df[df['pad'].astype(str).isin(target_pads)].reset_index(drop=True)
