# Processing and analyzing wells data

### Load python tools

In [1]:
import pandas as pd
import geopandas as gpd
import altair as alt

### Make stuff pretty

In [2]:
%load_ext lab_black

In [3]:
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

---

### Let's get the latest version of the data

In [5]:
# Download CSV from this url: http://aogweb.state.ak.us/DataMiner3/Forms/Production.aspx

In [6]:
src = pd.read_csv("input/Production 20210305 131038.csv")

### How many records are here? 

In [7]:
len(src)

1153194

### Clean up the headers

In [8]:
src.columns = (
    src.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("/", "_")
)

### Parse dates

In [9]:
src["date"] = pd.to_datetime(src["date"], format="%m/%Y")
src["month"] = src["date"].dt.strftime("%m")
src["year"] = src["date"].dt.strftime("%Y")

### Remove thousand separator characters from production details

In [10]:
src["gas"] = src["gas"].str.replace(",", "", regex=False)
src["oil"] = src["oil"].str.replace(",", "", regex=False)
src["water"] = src["water"].str.replace(",", "", regex=False)

### Convert production totals and days to numbers

In [11]:
src[["gas", "oil", "water", "days"]] = (
    src[["gas", "oil", "water", "days"]].fillna("0").astype(int)
)

In [12]:
src.head()

Unnamed: 0,permit,api,well_name,operator,well_status,area,field,pool,pad,date,production_type,production_method,oil,gas,water,days,month,year
0,208098,50883201270000,N COOK INLET UNIT A-16,"Hilcorp Alaska, LLC","Gas well, single completion",COOK INLET BASIN,NORTH COOK INLET,TERTIARY GAS,NO_COOK_INLET,2021-01-01,Gas Production,Gas Lift,0,73076,763,31,1,2021
1,208097,50883201260000,N COOK INLET UNIT A-15,"Hilcorp Alaska, LLC","Gas well, single completion",COOK INLET BASIN,NORTH COOK INLET,TERTIARY GAS,NO_COOK_INLET,2021-01-01,Gas Production,Gas Lift,0,44246,1608,31,1,2021
2,208096,50883201250000,N COOK INLET UNIT A-14,"Hilcorp Alaska, LLC","Gas well, single completion",COOK INLET BASIN,NORTH COOK INLET,TERTIARY GAS,NO_COOK_INLET,2021-01-01,Gas Production,Gas Lift,0,11467,1994,31,1,2021
3,198059,50883200950000,N COOK INLET UNIT B-03,"Hilcorp Alaska, LLC","Gas well, single completion",COOK INLET BASIN,NORTH COOK INLET,TERTIARY GAS,NO_COOK_INLET,2021-01-01,Gas Production,Gas Lift,0,32784,4917,31,1,2021
4,198002,50883200930100,N COOK INLET UNIT B-01A,"Hilcorp Alaska, LLC","Gas well & Disposal well, dual comp",COOK INLET BASIN,NORTH COOK INLET,TERTIARY GAS,NO_COOK_INLET,2021-01-01,Gas Production,Shut-In,0,0,0,0,1,2021


### Focus on the area of interest

In [13]:
# Hilcorp North Slope only produces in Prudhoe Bay, but to narrow down the BP search to those wells, I put "Arctic Slope" in Area and "Prudhoe Bay" in Field.

In [14]:
src_limit = src[(src["area"] == "ARCTIC SLOPE") & (src["field"] == "PRUDHOE BAY")]

### How many records now?

In [15]:
len(src_limit)

526765

In [16]:
# I'm specifically interested in production by Hilcorp North Slope from June 2020 through January 2021

In [17]:
months = ["06", "07", "08", "09", "10", "11", "12", "01"]

In [18]:
src_limit_dates = src_limit[src_limit["month"].isin(months)]

### How many records now?

In [19]:
len(src_limit_dates)

353047

In [20]:
src_limit_dates.operator.value_counts()

BP Exploration (Alaska) Inc.          323816
ARCO Alaska Inc.                       18729
Hilcorp North Slope, LLC               10117
Standard Alaska Production Company       220
ConocoPhillips Alaska, Inc.              136
Sohio Alaska Petroleum Company            29
Name: operator, dtype: int64

---

### OK, let's get to work

In [21]:
df = src_limit_dates.copy()

### Limit well analysis to Hilcorp North Slope, LLC

In [22]:
df_hilcorp = df[df["operator"] == "Hilcorp North Slope, LLC"]

In [23]:
df_hilcorp.head()

Unnamed: 0,permit,api,well_name,operator,well_status,area,field,pool,pad,date,production_type,production_method,oil,gas,water,days,month,year
975,219187,50029236610000,PRUDHOE BAY UNIT J-31,"Hilcorp North Slope, LLC","Oil well, single completion",ARCTIC SLOPE,PRUDHOE BAY,PRUDHOE OIL,PBU_J,2021-01-01,Oil Production,Flowing,23793,602981,37092,31,1,2021
981,219120,50029236470000,PRUDHOE BAY UN POL S-202,"Hilcorp North Slope, LLC","Oil well, single completion",ARCTIC SLOPE,PRUDHOE BAY,POLARIS OIL,PBU_S,2021-01-01,Oil Production,Gas Lift,56517,73097,0,31,1,2021
986,219102,50029236420000,PRUDHOE BAY UN LIS L5-07,"Hilcorp North Slope, LLC","Oil well, single completion",ARCTIC SLOPE,PRUDHOE BAY,LISBURNE OIL,PBU_LIS_L5,2021-01-01,Oil Production,Flowing,25336,331416,14473,31,1,2021
989,219073,50029236330000,PRUDHOE BAY UNIT 01-37,"Hilcorp North Slope, LLC","Oil well, single completion",ARCTIC SLOPE,PRUDHOE BAY,PRUDHOE OIL,PBU_DS1,2021-01-01,Oil Production,Gas Lift,13642,510738,55114,31,1,2021
992,219050,50029236280000,PRUDHOE BAY UN LIS L3-06,"Hilcorp North Slope, LLC","Oil well, single completion",ARCTIC SLOPE,PRUDHOE BAY,LISBURNE OIL,PBU_LIS_L3,2021-01-01,Oil Production,Flowing,8684,228494,3646,31,1,2021


### Make a list of all the Hilcorp wells 

In [24]:
df_hilcorp_wells = df_hilcorp.api.to_list()

In [25]:
len(df_hilcorp_wells)

10117

### What's the production by Hilcorp in 2020 & 2021? 

In [26]:
df_hilcorp[(df_hilcorp["year"] == "2020") | (df_hilcorp["year"] == "2021")].groupby(
    ["operator", "month"]
).agg({"oil": "sum", "gas": "sum", "water": "sum", "days": "size"}).reset_index()

Unnamed: 0,operator,month,oil,gas,water,days
0,"Hilcorp North Slope, LLC",1,6914369,270080714,45783845,1262
1,"Hilcorp North Slope, LLC",6,6186756,223959355,39011101,1266
2,"Hilcorp North Slope, LLC",7,6360604,225845283,43159682,1266
3,"Hilcorp North Slope, LLC",8,6229256,212702759,42815726,1267
4,"Hilcorp North Slope, LLC",9,6366968,226840297,42800112,1266
5,"Hilcorp North Slope, LLC",10,6719477,248089904,44142567,1263
6,"Hilcorp North Slope, LLC",11,6659646,245993639,44901708,1263
7,"Hilcorp North Slope, LLC",12,7039345,277644756,45874099,1264


### Get the production on those wells in past years

In [27]:
df_past = df[df["api"].isin(df_hilcorp_wells)]

### And limit it to after 1985?

In [28]:
df_past_wells = df_past[df_past["year"] > "1985"]

In [29]:
past_annual = (
    df_past_wells.groupby(["year"])
    .agg({"oil": "sum", "gas": "sum", "water": "sum", "days": "size"})
    .reset_index()
)

In [30]:
past_annual["oil_per_day"] = round(past_annual["oil"] / past_annual["days"], 0)
past_annual["gas_per_day"] = round(past_annual["gas"] / past_annual["days"], 0)
past_annual["water_per_day"] = round(past_annual["water"] / past_annual["days"], 0)

In [31]:
past_annual.head(10)

Unnamed: 0,year,oil,gas,water,days,oil_per_day,gas_per_day,water_per_day
0,1986,98390030,127845617,28598176,1347,73044.0,94911.0,21231.0
1,1987,103552721,183830549,52234134,1558,66465.0,117991.0,33526.0
2,1988,102372540,250511183,67370560,1664,61522.0,150548.0,40487.0
3,1989,97366305,285464342,74696358,1782,54639.0,160193.0,41917.0
4,1990,88291950,287618350,77465279,1929,45771.0,149102.0,40158.0
5,1991,96350406,348596918,87096947,2098,45925.0,166157.0,41514.0
6,1992,96331309,383970462,107345481,2267,42493.0,169374.0,47351.0
7,1993,92183598,406345940,129500097,2526,36494.0,160865.0,51267.0
8,1994,97586268,465522105,132469613,2818,34630.0,165196.0,47008.0
9,1995,98096291,532658766,147358425,2986,32852.0,178385.0,49350.0


In [32]:
past_annual.tail(10)

Unnamed: 0,year,oil,gas,water,days,oil_per_day,gas_per_day,water_per_day
26,2012,56950949,1354671017,206838894,7829,7274.0,173032.0,26420.0
27,2013,58120937,1498913195,202774650,8161,7122.0,183668.0,24847.0
28,2014,52149306,1485807942,205431642,8480,6150.0,175213.0,24225.0
29,2015,53926425,1553262054,214804156,8872,6078.0,175075.0,24211.0
30,2016,55021235,1665197732,230847466,9286,5925.0,179323.0,24860.0
31,2017,53486914,1656225517,244916406,9538,5608.0,173645.0,25678.0
32,2018,51947354,1719666762,262853956,9751,5327.0,176358.0,26957.0
33,2019,49506709,1769556671,281914860,9973,4964.0,177435.0,28268.0
34,2020,52457975,1923985210,343282268,10113,5187.0,190249.0,33945.0
35,2021,6914369,270080714,45783845,1262,5479.0,214010.0,36279.0


### Chart oil per-day rate by year

In [33]:
alt.Chart(past_annual).mark_line().encode(
    x=alt.X("year:T", axis=alt.Axis(format=("%y"))), y="oil_per_day"
).properties(width=1000)

### Chart more recent production

In [38]:
alt.Chart(past_annual.query("year > '2010'")).mark_line().encode(
    x=alt.X("year:T", axis=alt.Axis(format=("%m-%y"))), y="oil_per_day"
).properties(width=1000)