### Sampling from modeled and annonymized data

In [22]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np

In [None]:
# re-load all_stations GeoDataFrame to a file (e.g., GeoJSON format)
output_file_path = './prep_data/all_stations.geojson'
all_stations = gpd.read_file(output_file_path)
print(f"all_stations GeoDataFrame reloaded from {output_file_path}")

# re-load all_lines GeoDataFrame to a file (e.g., GeoJSON format)
output_file_path = './prep_data/all_lines.geojson'
all_lines = gpd.read_file(output_file_path)
print(f"all_lines GeoDataFrame reloaded from {output_file_path}")

# re-load modeling_cost DataFrame to a CSV file
output_file_path = './prep_data/modeling_cost_df.csv'
modeling_cost = pd.read_csv(output_file_path)
print(f"modeling_cost DataFrame reloaded from {output_file_path}")

all_stations GeoDataFrame reloaded from ./prep_data/all_stations.geojson
all_lines GeoDataFrame reloaded from ./prep_data/all_lines.geojson
modeling_cost DataFrame reloaded from ./prep_data/modeling_cost_df.csv


### Generate samples from model by line, all stations

In [33]:
all_stations

Unnamed: 0,Station,egrid_subregion,geometry
0,PZW,NWPP,POINT (-117.88145 47.00954)
1,WLO,NWPP,POINT (-116.09137 46.48743)
2,FWC,NWPP,POINT (-114.30129 45.96532)
3,UZG,NWPP,POINT (-112.5112 45.44321)
4,YKY,NWPP,POINT (-110.72112 44.9211)
...,...,...,...
118,EUU,SPSO,POINT (-90.73866 35.72965)
119,WYE,AZNM,POINT (-116.24833 33.30604)
120,KWM,RFCW,POINT (-83.34092 41.29167)
121,RLP,RFCE,POINT (-78.58103 41.61639)


In [24]:
all_lines

Unnamed: 0,Line,geometry
0,35,"LINESTRING (-117.88145 47.00954, -116.09137 46..."
1,194,"LINESTRING (-86.26374 36.39645, -90.41705 36.6..."
2,162,"LINESTRING (-98.55367 29.05979, -91.88391 29.5..."
3,36C,"LINESTRING (-110.72112 44.9211, -108.93104 44...."
4,176D,"LINESTRING (-98.55367 29.05979, -91.88391 29.5..."
5,18,"LINESTRING (-97.48228 38.81758, -99.70578 38.2..."
6,102,"LINESTRING (-86.26374 36.39645, -88.16269 37.8..."
7,21,"LINESTRING (-110.72112 44.9211, -110.32504 35...."
8,2A,"LINESTRING (-98.55367 29.05979, -91.88391 29.5..."
9,113,"LINESTRING (-86.26374 36.39645, -90.41705 36.6..."


In [25]:
# Filter modeling_cost for line '35'
line_35_data = modeling_cost[modeling_cost['Line'] == '35']
line_35_data

Unnamed: 0,Line,Station,min_FlowRate,max_FlowRate,min_PowerConsumption,max_PowerConsumption,min_DRACost,max_DRACost,PolyCoeffPowerConsumption,R2PowerConsumption,PolyCoeffDRACost,R2DRACost
36,35,SEN,600,1350,283.23585,1131.44689,0.0,0.0,"[1.133066324149232, -484.3356655947144]",0.944628,0,0.0
37,35,JUL,600,1350,531.527392,1038.224818,0.0,0.0,"[0.671767992731595, 46.54970840063145]",0.860912,0,0.0
38,35,GMC,600,1350,605.129668,2475.624288,0.0,0.0,"[2.495966832166364, -974.2071688404991]",0.989618,0,0.0
39,35,MMP,600,1350,98.970078,1038.133562,0.0,0.0,"[1.2549999520087418, -769.2499639320953]",0.923862,0,0.0
40,35,EUX,600,1350,662.062273,1570.385132,0.0,0.0,"[1.2137293394382282, -175.19201266766638]",0.926887,0,0.0
41,35,LBB,600,1350,219.902667,1171.726791,0.0,0.0,"[1.2106630119150352, -639.3563226241188]",0.822364,0,0.0
42,35,OKT,600,1350,395.96055,1058.957315,0.0,0.0,"[0.6003035485708452, 42.89932349028878]",0.456587,0,0.0
43,35,PZW,600,1350,969.964777,1809.271949,0.0,0.0,"[0.7082046327330357, 575.277516923043]",0.390356,0,0.0
44,35,XON,600,1350,694.840505,1240.094918,0.0,0.0,"[0.7276243305734861, 232.65192002275808]",0.988029,0,0.0
45,35,LJQ,600,1350,302.728426,1031.721251,0.0,0.0,"[0.972209917288466, -289.68784610725044]",0.999146,0,0.0


In [26]:
from numpy.polynomial.polynomial import Polynomial
import ast  # Import ast to safely evaluate the string representation of the list

# Iterate over each station in the filtered data
for _, row in line_35_data.iterrows():
    station = row['Station']
    min_flow = row['min_FlowRate']
    max_flow = row['max_FlowRate']
    
    # Convert the string representation of the polynomial coefficients to a list
    poly_coeffs_power = ast.literal_eval(row['PolyCoeffPowerConsumption']) 
    poly_coeffs_dra = ast.literal_eval(row['PolyCoeffDRACost'])
    
    # Sample 5 integers at random within the min/max flow rate range
    samples = np.random.randint(min_flow, max_flow, size=5)
    samples = np.sort(samples)  # Sort the samples for better readability

    # Generate PowerConsumptionkW using the polynomial coefficients
    power_consumption = poly_coeffs_power[0]*samples + poly_coeffs_power[1]
    # Ensure poly_coeffs_dra is a list
    if isinstance(poly_coeffs_dra, int):
        poly_coeffs_dra = [poly_coeffs_dra]
    if len(poly_coeffs_dra) > 1:
        dra_cost = poly_coeffs_dra[0]*samples + poly_coeffs_dra[1]
    else:
        dra_cost = 0

    # format as a pd.DataFrame
    power_consumption_df = pd.DataFrame({
        'Line': '35',
        'Station': station,
        'Flow Rate': samples,
        'PowerConsumptionkW': power_consumption,
        'DRACost': dra_cost
    })

    # Check 1: Ensure PowerConsumptionkW is not negative
    if np.any(power_consumption_df['PowerConsumptionkW'] < 0):
        display(power_consumption_df)
        print(f"Negative PowerConsumptionkW for station {station} with flow rates {samples}")
        # attemp 1: filter out negative values
        power_consumption_df = power_consumption_df[power_consumption_df['PowerConsumptionkW'] >= 0]
        # attemp 2: set negative values to zero
        if np.any(power_consumption_df['PowerConsumptionkW'] < 0):
            # to avoid negative PowerConsumptionkW we can set it to the sample values
            power_consumption_df = pd.DataFrame({
                'Line': '35',
                'Station': station,
                'Flow Rate': [min_flow, max_flow],
                'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
                'DRACost': [row['min_DRACost'], row['max_DRACost']]
            })
        display(power_consumption_df)

    # Check 2: Ensure DRA cost is not negative
    if np.any(power_consumption_df['DRACost'] < 0):
        print(f"Negative DRA cost for station {station} with flow rates {samples}")
        display(power_consumption_df)
        # to avoid negative DRACost we can set it to the sample values
        power_consumption_df = pd.DataFrame({
            'Line': '35',
            'Station': station,
            'Flow Rate': [min_flow, max_flow],
            'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
            'DRACost': [row['min_DRACost'], row['max_DRACost']]
        })
        display(power_consumption_df)
    
    # Check 3: Ensure PowerConsumptionkW is not larger than 110% max_PowerConsumption
    if np.any(power_consumption_df['PowerConsumptionkW'] > 1.1*row['max_PowerConsumption']): 
        display(power_consumption_df)
        print(f"PowerConsumptionkWW exceeds maximum for station {station} with flow rates {samples}")
        # to avoid exceeding max PowerConsumptionkW we can set it to the sample values
        power_consumption_df = pd.DataFrame({
            'Line': '35',
            'Station': station,
            'Flow Rate': [min_flow, max_flow],
            'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
            'DRACost': [row['min_DRACost'], row['max_DRACost']]
        })
        display(power_consumption_df)

    # Check 4: Ensure DRACost is not larger than 110% max_DRACost
    if np.any(power_consumption_df['DRACost'] > 1.1*row['max_DRACost']): 
        display(power_consumption_df)
        print(f"DRACost exceeds maximum for station {station} with flow rates {samples}")
        # to avoid exceeding max DRACost we can set it to the sample values
        power_consumption_df = pd.DataFrame({
            'Line': '187C',
            'Station': station,
            'Flow Rate': [min_flow, max_flow],
            'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
            'DRACost': [row['min_DRACost'], row['max_DRACost']]
        })
        display(power_consumption_df)

## Full sampling for all stations/Lines

In [27]:
samples_power_consumption_df = pd.DataFrame()

# Iterate over each station in the filtered data
for _, rowline in all_lines.iterrows():
    print(f"Processing line: {rowline['Line']}")
    # Filter modeling_cost for the current line
    this_line = rowline['Line']
    this_line_data = modeling_cost[modeling_cost['Line'] == this_line]
    # Iterate over each station in the filtered data
    for _, row in this_line_data.iterrows():
        print(f"\tProcessing Station: {row['Station']}")
        station = row['Station']
        min_flow = row['min_FlowRate']
        max_flow = row['max_FlowRate']
        
        # Convert the string representation of the polynomial coefficients to a list
        poly_coeffs_power = ast.literal_eval(row['PolyCoeffPowerConsumption']) 
        poly_coeffs_dra = ast.literal_eval(row['PolyCoeffDRACost'])
        
        # Sample 5 integers at random within the min/max flow rate range
        samples = np.random.randint(min_flow, max_flow, size=5)
        samples = np.sort(samples)  # Sort the samples for better readability

        # Generate PowerConsumptionkW using the polynomial coefficients
        power_consumption = poly_coeffs_power[0]*samples + poly_coeffs_power[1]
        # Ensure poly_coeffs_dra is a list
        if isinstance(poly_coeffs_dra, int):
            poly_coeffs_dra = [poly_coeffs_dra]
        if len(poly_coeffs_dra) > 1:
            dra_cost = poly_coeffs_dra[0]*samples + poly_coeffs_dra[1]
        else:
            dra_cost = 0

        # format as a pd.DataFrame
        power_consumption_df = pd.DataFrame({
            'Line': this_line,
            'Station': station,
            'FlowRate': samples,
            'PowerConsumptionkW': power_consumption,
            'DRACost': dra_cost
        })

        ## Quality Checks
        # Check 1: Ensure PowerConsumptionkW is not negative
        if np.any(power_consumption_df['PowerConsumptionkW'] < 0):
            print(f"Negative PowerConsumptionkW for station {station} with flow rates {samples}")
            # attemp 1: filter out negative values
            power_consumption_df = power_consumption_df[power_consumption_df['PowerConsumptionkW'] >= 0]
            # attemp 2: set negative values to zero
            if np.any(power_consumption_df['PowerConsumptionkW'] < 0):
                # to avoid negative PowerConsumptionkW we can set it to the sample values
                power_consumption_df = pd.DataFrame({
                    'Line': this_line,
                    'Station': station,
                    'Flow Rate': [min_flow, max_flow],
                    'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
                    'DRACost': [row['min_DRACost'], row['max_DRACost']]
                })
            display(power_consumption_df)

        # Check 2: Ensure DRA cost is not negative
        if np.any(power_consumption_df['DRACost'] < 0):
            print(f"Negative DRA cost for station {station} with flow rates {samples}")
            # attemp 1: filter out negative values
            power_consumption_df = power_consumption_df[power_consumption_df['DRACost'] >= 0]
            # attemp 2: set negative values to zero
            if np.any(power_consumption_df['DRACost'] < 0):
                # to avoid negative DRACost we can set it to the sample values
                power_consumption_df = pd.DataFrame({
                    'Line': this_line,
                    'Station': station,
                    'Flow Rate': [min_flow, max_flow],
                    'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
                    'DRACost': [row['min_DRACost'], row['max_DRACost']]
                })
            display(power_consumption_df)
        
        # Check 3: Ensure PowerConsumptionkW is not larger than 110% max_PowerConsumption
        if np.any(power_consumption_df['PowerConsumptionkW'] > 1.1*row['max_PowerConsumption']): 
            print(f"PowerConsumptionkW exceeds maximum for station {station} with flow rates {samples}")
            # to avoid exceeding max PowerConsumptionkW we can set it to the sample values
            power_consumption_df = pd.DataFrame({
                'Line': this_line,
                'Station': station,
                'Flow Rate': [min_flow, max_flow],
                'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
                'DRACost': [row['min_DRACost'], row['max_DRACost']]
            })
            display(power_consumption_df)

        # Check 4: Ensure DRACost is not larger than 110% max_DRACost
        if np.any(power_consumption_df['DRACost'] > 1.1*row['max_DRACost']): 
            print(f"DRACost exceeds maximum for station {station} with flow rates {samples}")
            # to avoid exceeding max DRACost we can set it to the sample values
            power_consumption_df = pd.DataFrame({
                'Line': this_line,
                'Station': station,
                'Flow Rate': [min_flow, max_flow],
                'PowerConsumptionkW': [row['min_PowerConsumption'], row['max_PowerConsumption']],
                'DRACost': [row['min_DRACost'], row['max_DRACost']]
            })
            display(power_consumption_df)

        # Append the DataFrame to the samples_power_consumption_df
        samples_power_consumption_df = pd.concat([samples_power_consumption_df, power_consumption_df], ignore_index=True)

Processing line: 35
	Processing Station: SEN
	Processing Station: JUL
	Processing Station: GMC
	Processing Station: MMP
	Processing Station: EUX
	Processing Station: LBB
	Processing Station: OKT
	Processing Station: PZW
	Processing Station: XON
	Processing Station: LJQ
	Processing Station: BNR
	Processing Station: CVY
	Processing Station: ETM
	Processing Station: UZG
	Processing Station: ILF
	Processing Station: SRO
	Processing Station: WLO
	Processing Station: RID
	Processing Station: MUR
	Processing Station: FWC
	Processing Station: YKY
	Processing Station: SRE
	Processing Station: PWN
	Processing Station: ZGF
	Processing Station: WQL
	Processing Station: VPV
Processing line: 194
	Processing Station: YZI
	Processing Station: SYA
	Processing Station: BFR
	Processing Station: AUH
	Processing Station: HDE
Negative DRA cost for station HDE with flow rates [1474 1848 2320 2376 2418]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,194,HDE,1848,1384.669593,427.361161
2,194,HDE,2320,2019.321926,1057.508002
3,194,HDE,2376,2094.619661,1132.271187
4,194,HDE,2418,2151.092962,1188.343575


	Processing Station: YMQ
	Processing Station: YAG
Negative DRA cost for station YAG with flow rates [1463 2124 2134 2293 2294]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,194,YAG,2124,1653.578297,979.610055
2,194,YAG,2134,1671.58419,995.690134
3,194,YAG,2293,1957.87789,1251.363386
4,194,YAG,2294,1959.678479,1252.971393


	Processing Station: XOL
Negative DRA cost for station XOL with flow rates [1472 1493 1568 1776 1950]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,194,XOL,1776,1374.263151,221.599267
4,194,XOL,1950,1698.322002,461.627059


	Processing Station: IIC
	Processing Station: SPM
	Processing Station: SRC
	Processing Station: HKA
Negative DRA cost for station HKA with flow rates [1382 2029 2048 2111 2332]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,194,HKA,2029,1899.6445,510.975749
2,194,HKA,2048,1941.908498,536.11999
3,194,HKA,2111,2082.047017,619.492999
4,194,HKA,2332,2573.644045,911.960221


Processing line: 162
	Processing Station: SEN
	Processing Station: SRH
Negative DRA cost for station SRH with flow rates [1902 1972 2053 2493 2726]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,SRH,1972,1612.268427,58.775219
2,162,SRH,2053,1801.698133,185.91006
3,162,SRH,2493,2830.699002,876.519076
4,162,SRH,2726,3375.601736,1242.227941


	Processing Station: SRO
Negative DRA cost for station SRO with flow rates [1550 2193 2420 2722 2768]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,SRO,2193,2691.527879,786.284543
2,162,SRO,2420,3189.397596,1472.89077
3,162,SRO,2722,3851.761713,2386.349274
4,162,SRO,2768,3952.651612,2525.485338


	Processing Station: CVY
Negative DRA cost for station CVY with flow rates [1514 1634 1800 2528 2884]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,162,CVY,2528,2581.200484,699.429826
4,162,CVY,2884,3232.09736,1105.225397


	Processing Station: NMJ
Negative DRA cost for station NMJ with flow rates [1735 1859 2123 2555 2787]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,NMJ,1859,388.618551,39.766911
2,162,NMJ,2123,960.335508,821.665033
3,162,NMJ,2555,1895.872348,2101.134687
4,162,NMJ,2787,2398.290281,2788.257279


	Processing Station: MMP
Negative DRA cost for station MMP with flow rates [1558 1596 1809 2050 2536]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,162,MMP,2050,1215.622307,290.220595
4,162,MMP,2536,2395.797832,1107.267606


	Processing Station: SRE
Negative DRA cost for station SRE with flow rates [1836 2152 2460 2580 2585]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,SRE,2152,1048.653532,857.94613
2,162,SRE,2460,1529.988498,1708.031814
3,162,SRE,2580,1717.521601,2039.234029
4,162,SRE,2585,1725.335481,2053.034121


	Processing Station: UZG
Negative DRA cost for station UZG with flow rates [1764 1949 2666 2757 2793]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,UZG,1949,1406.433335,39.714244
2,162,UZG,2666,3138.322187,1001.528079
3,162,UZG,2757,3358.129559,1123.599291
4,162,UZG,2793,3445.086322,1171.891199


	Processing Station: FWC
Negative DRA cost for station FWC with flow rates [1716 1784 2429 2600 2716]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,162,FWC,2429,2693.047089,312.60028
3,162,FWC,2600,3106.719652,425.494902
4,162,FWC,2716,3387.339637,502.078389


	Processing Station: RID
Negative DRA cost for station RID with flow rates [1877 1882 2354 2779 2809]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,162,RID,2354,2361.896568,477.274532
3,162,RID,2779,3291.364218,944.15711
4,162,RID,2809,3356.973699,977.113527


	Processing Station: QCT
Negative DRA cost for station QCT with flow rates [1832 1849 2103 2516 2702]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,162,QCT,2103,1568.803227,251.319766
3,162,QCT,2516,2549.550036,806.259543
4,162,QCT,2702,2991.242304,1056.183995


	Processing Station: GMC
	Processing Station: LJQ
Negative DRA cost for station LJQ with flow rates [1774 1774 2137 2179 2190]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,162,LJQ,2137,1794.888146,534.413435
3,162,LJQ,2179,1875.878628,651.59379
4,162,LJQ,2190,1897.09042,682.283882


	Processing Station: RJY
Negative DRA cost for station RJY with flow rates [1610 2049 2193 2446 2451]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,RJY,2049,1444.305429,476.320423
2,162,RJY,2193,1761.500186,702.832026
3,162,RJY,2446,2318.793752,1100.800328
4,162,RJY,2451,2329.807459,1108.665314


	Processing Station: ILF
	Processing Station: ZGF
	Processing Station: PWN
Negative DRA cost for station PWN with flow rates [1687 2104 2107 2501 2667]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,162,PWN,2104,1900.140097,630.156947
2,162,PWN,2107,1906.554304,635.877209
3,162,PWN,2501,2748.953434,1387.138329
4,162,PWN,2667,3103.872865,1703.659511


Processing line: 36C
	Processing Station: ZDB
	Processing Station: NHY
Negative DRA cost for station NHY with flow rates [1634 2235 2373 2841 2866]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,36C,NHY,2235,1802.104319,654.461714
2,36C,NHY,2373,2055.218158,889.55528
3,36C,NHY,2841,2913.604221,1686.829114
4,36C,NHY,2866,2959.458178,1729.418528


	Processing Station: LBB
Negative DRA cost for station LBB with flow rates [1553 1603 1727 1866 2572]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
4,36C,LBB,2572,2171.255427,1903.367253


	Processing Station: XON
Negative DRA cost for station XON with flow rates [1503 1822 1837 2414 2804]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,36C,XON,1822,1148.045598,2.26604
2,36C,XON,1837,1172.209246,36.297341
3,36C,XON,2414,2101.704218,1345.36803
4,36C,XON,2804,2729.959052,2230.18184


	Processing Station: SQO
Negative DRA cost for station SQO with flow rates [1620 2346 2363 2808 2932]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,36C,SQO,2346,1286.759203,2160.244308
2,36C,SQO,2363,1313.082848,2217.816355
3,36C,SQO,2808,2002.142952,3724.849346
4,36C,SQO,2932,2194.150711,4144.786629


	Processing Station: OKT
	Processing Station: YKY
Negative DRA cost for station YKY with flow rates [1609 1646 1736 2395 2534]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,36C,YKY,1736,903.662132,47.217796
3,36C,YKY,2395,2250.463195,2364.237194
4,36C,YKY,2534,2534.538078,2852.956005


	Processing Station: DUY
	Processing Station: EUX
Negative DRA cost for station EUX with flow rates [1528 1576 2182 2447 2719]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,36C,EUX,2182,2112.392063,1459.72693
3,36C,EUX,2447,2549.840959,2433.602953
4,36C,EUX,2719,2998.845109,3433.204003


	Processing Station: JUL
	Processing Station: VPV
Negative DRA cost for station VPV with flow rates [1556 1627 2057 2141 2431]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,36C,VPV,2057,1450.402696,351.225085
3,36C,VPV,2141,1531.725939,494.325516
4,36C,VPV,2431,1812.484757,988.362721


	Processing Station: WQL
Negative DRA cost for station WQL with flow rates [1783 1876 2161 2298 2884]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,36C,WQL,2161,1151.273942,650.443369
3,36C,WQL,2298,1376.64986,983.361043
4,36C,WQL,2884,2340.666562,2407.373866


Processing line: 176D
	Processing Station: FWC
	Processing Station: UZG
	Processing Station: YKY
	Processing Station: WQL
	Processing Station: JUL
	Processing Station: RJY
	Processing Station: DVZ
	Processing Station: EUX
	Processing Station: DUY
Negative DRA cost for station DUY with flow rates [3811 3869 3987 5346 5349]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,176D,DUY,5346,6436.500009,3023.728986
4,176D,DUY,5349,6443.61056,3031.556458


	Processing Station: PWN
	Processing Station: SRO
	Processing Station: GMC
	Processing Station: NHY
	Processing Station: ILF
	Processing Station: SEN
	Processing Station: MMP
	Processing Station: SQO
	Processing Station: CVY
	Processing Station: LBB
	Processing Station: VPV
	Processing Station: SRH
	Processing Station: NMJ
	Processing Station: ZDB
	Processing Station: LJQ
	Processing Station: QHS
	Processing Station: SRE
	Processing Station: QCT
Negative DRA cost for station QCT with flow rates [4192 4211 4903 5253 5377]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,176D,QCT,4903,5244.633827,415.156169
3,176D,QCT,5253,6079.888369,651.15219
4,176D,QCT,5377,6375.807122,734.762209


	Processing Station: RID
Processing line: 18
	Processing Station: IIC
	Processing Station: VBM
	Processing Station: YSX
	Processing Station: EYK
	Processing Station: UBY
	Processing Station: BEK
	Processing Station: VHD
	Processing Station: CAQ
Negative DRA cost for station CAQ with flow rates [2209 2447 2554 2925 3232]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,18,CAQ,2447,648.76926,134.206405
2,18,CAQ,2554,772.091494,505.128552
3,18,CAQ,2925,1199.685406,1791.2231
4,18,CAQ,3232,1553.516488,2855.457672


	Processing Station: BAK
	Processing Station: NOO
	Processing Station: RLF
Negative DRA cost for station RLF with flow rates [2179 2994 3186 3440 3541]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,18,RLF,2994,1934.039052,1509.163181
2,18,RLF,3186,2214.545552,2014.324418
3,18,RLF,3440,2585.632276,2682.610637
4,18,RLF,3541,2733.190382,2948.346496


	Processing Station: RJC
	Processing Station: SCQ
	Processing Station: CMW
Processing line: 102
	Processing Station: GYK
	Processing Station: UOU
	Processing Station: QAV
Negative DRA cost for station QAV with flow rates [4949 5634 6378 6924 8047]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,102,QAV,6378,6132.252023,276.025467
3,102,QAV,6924,7163.173968,488.023848
4,102,QAV,8047,9283.550057,924.057147


	Processing Station: IIC
	Processing Station: FKJ
	Processing Station: SYA
Negative DRA cost for station SYA with flow rates [4042 5353 5961 6724 8690]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,102,SYA,5961,6153.459394,76.076411
3,102,SYA,6724,8017.233162,273.579175
4,102,SYA,8690,12819.565046,782.478829


	Processing Station: IGZ
Negative PowerConsumptionkW for station IGZ with flow rates [3774 5443 7046 7740 8643]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,102,IGZ,5443,1429.438065,0
2,102,IGZ,7046,3988.237384,0
3,102,IGZ,7740,5096.03946,0
4,102,IGZ,8643,6537.459165,0


	Processing Station: XWT
Negative DRA cost for station XWT with flow rates [4896 5033 6786 6991 8758]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,102,XWT,6786,7476.480295,217.220971
3,102,XWT,6991,7984.485169,257.019211
4,102,XWT,8758,12363.239376,600.060627


	Processing Station: FNO
Negative PowerConsumptionkW for station FNO with flow rates [3752 4808 5051 5659 8021]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,102,FNO,4808,989.994157,0
2,102,FNO,5051,1450.055562,0
3,102,FNO,5659,2601.155704,0
4,102,FNO,8021,7073.028295,0


	Processing Station: QFE
Negative PowerConsumptionkW for station QFE with flow rates [3793 4812 6230 7572 8747]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,102,QFE,4812,1964.768937,0
2,102,QFE,6230,4888.526775,0
3,102,QFE,7572,7655.58109,0
4,102,QFE,8747,10078.300031,0


	Processing Station: PQQ
	Processing Station: YMQ
Negative PowerConsumptionkW for station YMQ with flow rates [4063 4104 4116 4505 7473]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,102,YMQ,4505,283.722129,0
4,102,YMQ,7473,5941.22791,0


	Processing Station: SLO
	Processing Station: MEO
	Processing Station: IJO
Negative DRA cost for station IJO with flow rates [4151 6855 6953 8669 9041]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,102,IJO,6855,8721.456702,307.488562
2,102,IJO,6953,8983.371424,332.85589
3,102,IJO,8669,13569.551648,777.042973
4,102,IJO,9041,14563.75855,873.335277


	Processing Station: YAG
Negative DRA cost for station YAG with flow rates [4855 6862 6972 7116 9099]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,102,YAG,6862,10316.17472,231.97544
2,102,YAG,6972,10595.197004,253.330594
3,102,YAG,7116,10960.46254,281.286431
4,102,YAG,9099,15990.473353,666.261602


Processing line: 21
	Processing Station: YKY
	Processing Station: EUX
	Processing Station: NHY
	Processing Station: ZDB
	Processing Station: OKT
Processing line: 2A
	Processing Station: SRE
	Processing Station: APX
	Processing Station: ILF
	Processing Station: FWC
	Processing Station: SRH
	Processing Station: RID
	Processing Station: SRO
	Processing Station: YKY
	Processing Station: DUY
Negative DRA cost for station DUY with flow rates [3672 4111 4851 4956 5722]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,2A,DUY,4111,2779.368041,107.780531
2,2A,DUY,4851,3840.418037,757.503317
3,2A,DUY,4956,3990.972428,849.693713
4,2A,DUY,5722,5089.302559,1522.244597


	Processing Station: BNR
	Processing Station: MMP
Negative DRA cost for station MMP with flow rates [3355 3417 3509 3629 5435]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
4,2A,MMP,5435,9164.498288,1099.477284


	Processing Station: RJY
	Processing Station: NQH
	Processing Station: LBB
	Processing Station: NMJ
	Processing Station: CVY
	Processing Station: VPV
	Processing Station: GHC
	Processing Station: UZG
	Processing Station: WQL
	Processing Station: DVZ
	Processing Station: PWN
	Processing Station: GMC
	Processing Station: SEN
	Processing Station: QHS
	Processing Station: JUL
	Processing Station: QCT
	Processing Station: EUX
Processing line: 113
	Processing Station: BFR
	Processing Station: AUH
Negative DRA cost for station AUH with flow rates [3200 3764 4318 4354 4355]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,113,AUH,3764,3095.296132,1391.850512
2,113,AUH,4318,4065.775304,3032.178809
3,113,AUH,4354,4128.838933,3138.77054
4,113,AUH,4355,4130.5907,3141.731421


	Processing Station: SYA
Negative DRA cost for station SYA with flow rates [3217 3262 3277 3536 4387]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,113,SYA,3536,2787.971607,625.191017
4,113,SYA,4387,4517.520059,2895.485185


	Processing Station: YMQ
	Processing Station: OJJ
	Processing Station: RYB
	Processing Station: RQT
	Processing Station: IJO
Negative DRA cost for station IJO with flow rates [3032 3214 3365 3849 4297]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,113,IJO,3365,2039.862442,330.134032
3,113,IJO,3849,2358.751218,2408.336651
4,113,IJO,4297,2653.920994,4331.962216


	Processing Station: SYY
	Processing Station: IIC
	Processing Station: TDD
	Processing Station: SPM
Negative DRA cost for station SPM with flow rates [3035 3060 3488 3801 3981]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,113,SPM,3488,2336.758068,464.318097
3,113,SPM,3801,2874.288539,1326.572446
4,113,SPM,3981,3183.411493,1822.437567


	Processing Station: HDE
Negative DRA cost for station HDE with flow rates [3106 3140 4170 4221 4501]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
2,113,HDE,4170,4061.409731,3135.52941
3,113,HDE,4221,4153.800577,3318.889296
4,113,HDE,4501,4661.044439,4325.571023


	Processing Station: YAG
	Processing Station: SLO
	Processing Station: YZI
	Processing Station: JJL
	Processing Station: MEO
	Processing Station: PQQ
	Processing Station: UFY
	Processing Station: XOL
	Processing Station: CSI
	Processing Station: SCS
	Processing Station: DVA
Processing line: 91
	Processing Station: TCT
Negative PowerConsumptionkW for station TCT with flow rates [2074 2681 2718 3441 3476]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,91,TCT,2681,991.386633,0
2,91,TCT,2718,1089.622193,0
3,91,TCT,3441,3009.198124,0
4,91,TCT,3476,3102.123653,0


	Processing Station: UOF
	Processing Station: CFZ
	Processing Station: VAD
Processing line: 157B
	Processing Station: RXP
	Processing Station: TGE
	Processing Station: XJD
	Processing Station: OWT
Processing line: 161E
	Processing Station: GMC
	Processing Station: KWM
	Processing Station: CVY
	Processing Station: MMP
	Processing Station: JUL
	Processing Station: EUU
	Processing Station: EUX
	Processing Station: SRH
	Processing Station: NHY
Negative DRA cost for station NHY with flow rates [3553 4166 4502 4957 5142]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,161E,NHY,4166,3671.018671,13.993225
2,161E,NHY,4502,4567.109291,38.991976
3,161E,NHY,4957,5780.56534,72.844451
4,161E,NHY,5142,6273.948568,86.608644


	Processing Station: NQH
Negative DRA cost for station NQH with flow rates [3704 5026 5101 5258 5431]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,161E,NQH,5026,6612.879672,77.978123
2,161E,NQH,5101,6798.540308,83.558201
3,161E,NQH,5258,7187.189906,95.239165
4,161E,NQH,5431,7615.447107,108.110546


	Processing Station: WYE
	Processing Station: RID
	Processing Station: SEN
	Processing Station: UZG
	Processing Station: VPV
	Processing Station: YKY
	Processing Station: QCT
	Processing Station: RJY
	Processing Station: BNR
	Processing Station: RLP
Negative DRA cost for station RLP with flow rates [3374 3881 4090 4840 5208]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
1,161E,RLP,3881,2552.330807,45.916933
2,161E,RLP,4090,3108.421944,91.370493
3,161E,RLP,4840,5103.964304,254.481352
4,161E,RLP,5208,6083.110421,334.514414


	Processing Station: QHS
	Processing Station: WQL
	Processing Station: QCZ
Negative DRA cost for station QCZ with flow rates [3014 3262 3460 4416 5187]


Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
3,161E,QCZ,4416,5481.740271,66.798137
4,161E,QCZ,5187,7354.084944,160.55494


	Processing Station: ILF
	Processing Station: FWC
	Processing Station: JWW


In [28]:
display(samples_power_consumption_df)

Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost
0,35,SEN,609,205.701726,0.0
1,35,SEN,699,307.677695,0.0
2,35,SEN,1010,660.061322,0.0
3,35,SEN,1127,792.630082,0.0
4,35,SEN,1183,856.081796,0.0
...,...,...,...,...,...
990,161E,JWW,3391,2874.148593,0.0
991,161E,JWW,3749,4182.970120,0.0
992,161E,JWW,4759,7875.455433,0.0
993,161E,JWW,4992,8727.286204,0.0


### Incorporate Power Consumption rates

In [None]:
# Generate synthetic power rates
synthetic_power_rates = samples_power_consumption_df[['Line', 'Station']].drop_duplicates().copy()
synthetic_power_rates['EnergyConsumptionRateCADperkWh'] = np.random.uniform(0.05, 0.5, size=len(synthetic_power_rates)).round(9)
display(synthetic_power_rates)

Unnamed: 0,Line,Station,EnergyConsumptionRateCADperkWh
0,35,SEN,0.336838
5,35,JUL,0.076273
10,35,GMC,0.143926
15,35,MMP,0.472360
20,35,EUX,0.418046
...,...,...,...
973,161E,WQL,0.344230
978,161E,QCZ,0.169526
980,161E,ILF,0.136939
985,161E,FWC,0.471016


### Derive final calculated columns
EnergyConsumptionCAD = EnergyConsumptionRateCADperkWh * PowerConsumptionkW

TotalPowerDRACostCAD = DRACost + EnergyConsumptionCAD

In [32]:
# Merge synthetic_power_rates with samples_power_consumption_df to include EnergyConsumptionRateCADperkWh
samples_power_consumption_df = samples_power_consumption_df.merge(
    synthetic_power_rates, on=['Line', 'Station'], how='left'
)

# Calculate EnergyConsumptionCAD
samples_power_consumption_df['EnergyConsumptionCAD'] = (
    samples_power_consumption_df['EnergyConsumptionRateCADperkWh'] * samples_power_consumption_df['PowerConsumptionkW']
)

# Calculate TotalPowerDRACostCAD
samples_power_consumption_df['TotalPowerDRACostCAD'] = (
    samples_power_consumption_df['DRACost'] + samples_power_consumption_df['EnergyConsumptionCAD']
)

# Display the updated DataFrame
display(samples_power_consumption_df)

Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost,EnergyConsumptionRateCADperkWh,EnergyConsumptionCAD,TotalPowerDRACostCAD
0,35,SEN,609,205.701726,0.0,0.336838,69.288090,69.288090
1,35,SEN,699,307.677695,0.0,0.336838,103.637438,103.637438
2,35,SEN,1010,660.061322,0.0,0.336838,222.333518,222.333518
3,35,SEN,1127,792.630082,0.0,0.336838,266.987671,266.987671
4,35,SEN,1183,856.081796,0.0,0.336838,288.360598,288.360598
...,...,...,...,...,...,...,...,...
990,161E,JWW,3391,2874.148593,0.0,0.065093,187.086819,187.086819
991,161E,JWW,3749,4182.970120,0.0,0.065093,272.281877,272.281877
992,161E,JWW,4759,7875.455433,0.0,0.065093,512.636650,512.636650
993,161E,JWW,4992,8727.286204,0.0,0.065093,568.084831,568.084831


In [None]:
# Merge egrid_subregion
samples_power_consumption_df = samples_power_consumption_df.merge(
    all_stations[['Station', 'egrid_subregion']], on='Station', how='left'
)
display(samples_power_consumption_df)

Unnamed: 0,Line,Station,FlowRate,PowerConsumptionkW,DRACost,EnergyConsumptionRateCADperkWh,EnergyConsumptionCAD,TotalPowerDRACostCAD,egrid_subregion
0,35,SEN,609,205.701726,0.0,0.336838,69.288090,69.288090,SRMW
1,35,SEN,699,307.677695,0.0,0.336838,103.637438,103.637438,SRMW
2,35,SEN,1010,660.061322,0.0,0.336838,222.333518,222.333518,SRMW
3,35,SEN,1127,792.630082,0.0,0.336838,266.987671,266.987671,SRMW
4,35,SEN,1183,856.081796,0.0,0.336838,288.360598,288.360598,SRMW
...,...,...,...,...,...,...,...,...,...
1180,161E,JWW,4759,7875.455433,0.0,0.065093,512.636650,512.636650,SRMV
1181,161E,JWW,4992,8727.286204,0.0,0.065093,568.084831,568.084831,SRSO
1182,161E,JWW,4992,8727.286204,0.0,0.065093,568.084831,568.084831,SRMV
1183,161E,JWW,5468,10467.507005,0.0,0.065093,681.360941,681.360941,SRSO


In [35]:
# save to csv
samples_power_consumption_df.to_csv (
    './prep_data/power_consumption_df.csv', 
    index = False, 
    header=True
)