In [2]:
import pandas as pd
import plotly.express as px


# Read Line 1 Transit time data

In [3]:
df = pd.read_excel(r"C:\Users\siwz@equinor.com\OneDrive - Equinor\INTERN Workfiles\Python\Colonial Data Extraction\ColonialTransitTimes_Line1.xlsx")


# Read pipeline EIA data

In [4]:
EIA_df = pd.read_csv(r'C:\Users\siwz@equinor.com\OneDrive - Equinor\personal\projects\Colonial Pipeline Forecast\gasoline_pipeline_EIA.csv')
EIA_df

Unnamed: 0,period,value
0,2018-01,1528.900000
1,2018-02,1422.333333
2,2018-03,1501.033333
3,2018-04,1459.100000
4,2018-05,1566.933333
...,...,...
83,2024-12,1580.500000
84,2025-01,1520.400000
85,2025-02,1563.100000
86,2025-03,1558.333333


# Line 1 transit time further cleaning 

##  We are keeping the last row for a cycle


##  Maybe can take the average transit time for that cycle (not implement)

In [5]:
def keep_latest_cycle_per_year(df, date_col="Date", cycle_col="Cycle"):

    # ensure datetime
    df = df.copy()
    df["Gas Transit Days"] = df["Gas Days"] + df["Gas Hours"] / 24
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")

    # group by (year, cycle) and take idxmax
    idx = df.groupby([df[date_col].dt.year, df[cycle_col]])[date_col].idxmax()

    return df.loc[idx].sort_values(date_col).reset_index(drop=True)

line_1_df = keep_latest_cycle_per_year(df)
line_1_df

Unnamed: 0,Date,From,To,Cycle,Gas Days,Gas Hours,Distillates Days,Distillates Hours,Gas Transit Days
0,2024-08-22,HTN,GBJ,46,8,7,7.0,7.0,8.291667
1,2024-08-26,HTN,GBJ,47,7,6,7.0,10.0,7.250000
2,2024-08-29,HTN,GBJ,48,8,18,9.0,10.0,8.750000
3,2024-09-04,HTN,GBJ,49,7,6,9.0,10.0,7.250000
4,2024-09-08,HTN,GBJ,50,8,23,9.0,18.0,8.958333
...,...,...,...,...,...,...,...,...,...
69,2025-07-27,HTN,GBJ,42,9,8,10.0,5.0,9.333333
70,2025-08-03,HTN,GBJ,43,8,19,7.0,11.0,8.791667
71,2025-08-06,HTN,GBJ,44,9,3,7.0,12.0,9.125000
72,2025-08-10,HTN,GBJ,45,9,3,10.0,7.0,9.125000


## Calculate the monthly avg gas transit time

In [52]:
def monthly_avg_gas_transit(df, date_col="Date", transit_col="Gas Transit Days"):
    d = df[[date_col, transit_col]].copy()
    d[date_col] = pd.to_datetime(d[date_col], errors="coerce")
    d[transit_col] = pd.to_numeric(d[transit_col], errors="coerce")
    out = (
        d.dropna()
         .groupby(d[date_col].dt.to_period("M"))[transit_col]
         .mean()
         .reset_index()
         .rename(columns={date_col: "Month", transit_col: "Gas Transit Days Avg"})
    )
    out["Month"] = out["Month"].astype(str)  # e.g., '2025-05'
    return out


ma_line1_df = monthly_avg_gas_transit(line_1_df, date_col="Date", transit_col="Gas Transit Days")
ma_line1_df
 

Unnamed: 0,Month,Gas Transit Days Avg
0,2024-08,8.097222
1,2024-09,8.173611
2,2024-10,8.013889
3,2024-11,8.333333
4,2024-12,7.614583
5,2025-01,7.775
6,2025-02,7.732143
7,2025-03,8.479167
8,2025-04,8.798611
9,2025-05,9.944444


# Plot the Actual Pipeline data vs the Implied Pipeline (a range) data

In [53]:
def bounds_from_monthly_transit(ma_df, lower_L=7.0, upper_L=8.61):
    # ma_df: columns ["Month", "Gas Transit Days Avg"]
    df = ma_df[["Month", "Gas Transit Days Avg"]].copy()
    df["date"] = pd.to_datetime(df["Month"])
    df["T"]    = pd.to_numeric(df["Gas Transit Days Avg"])
    df["Q_low_mbd"]  = lower_L / df["T"]
    df["Q_high_mbd"] = upper_L / df["T"]
    return df[["date", "Q_low_mbd", "Q_high_mbd"]].sort_values("date")

def plot_bounds_vs_eia(ma_df, eia_df, lower_L=7.0, upper_L=8.61):
    # Build bounds from monthly transit averages
    b = bounds_from_monthly_transit(ma_df, lower_L, upper_L)

    # Prepare EIA P3→P1 gasoline receipts (kbd -> Mb/d)
    a = eia_df[["period", "value"]].copy()
    a["date"]   = pd.to_datetime(a["period"])
    a["actual"] = pd.to_numeric(a["value"]) / 1000.0* 0.65
    a = a[["date", "actual"]].sort_values("date")

    # Merge on date (asof handles slight mismatches)
    m = pd.merge_asof(b.sort_values("date"), a.sort_values("date"), on="date")

    # Long form for px.line
    m = m.rename(columns={
        "Q_low_mbd":  "7 / T (Mb/d)",
        "Q_high_mbd": "8.61 / T (Mb/d)",
        "actual":     "P3→P1 Gasoline (Mb/d)"
    })
    long = m.melt(id_vars="date", value_vars=["7 / T (Mb/d)", "8.61 / T (Mb/d)", "P3→P1 Gasoline (Mb/d)"],
                  var_name="Series", value_name="Mb/d")

    fig = px.line(long, x="date", y="Mb/d", color="Series",
                  title="Line 1 Bounds from Transit vs P3→P1 Gasoline Pipeline Receipts")
    fig.update_layout(xaxis_title="Month", yaxis_title="Mb/d", legend_title_text="")
    fig.show()



In [54]:
plot_bounds_vs_eia(ma_line1_df, EIA_df)

In [55]:
EIA_df

Unnamed: 0,period,value
0,2018-01,1528.900000
1,2018-02,1422.333333
2,2018-03,1501.033333
3,2018-04,1459.100000
4,2018-05,1566.933333
...,...,...
83,2024-12,1580.500000
84,2025-01,1520.400000
85,2025-02,1563.100000
86,2025-03,1558.333333
