In [None]:
#This performs regressions of regional BESS use vs carbon intensity and puts into a table, but also includes a p-value
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import t

# Load CSV
csv_path = r"C:\Users\spice\Dropbox\Documents\Imperial 2024.2025\MECH70038 - Research Projects\_My Thesis\Data\Data_1Jul24_to_30Jun25.csv"
df = pd.read_csv(csv_path)

# Region codes and descriptions
regions = ['_A','_B','_C','_D','_E','_F','_G','_H','_J','_K','_L','_M','_N','_P']
region_descriptions = {
    '_A': 'Eastern',
    '_B': 'East Midlands',
    '_C': 'London',
    '_D': 'Merseyside & North Wales',
    '_E': 'Midlands',
    '_F': 'Northern',
    '_G': 'North Western',
    '_H': 'Southern',
    '_J': 'South Eastern',
    '_K': 'South Wales',
    '_L': 'South Western',
    '_M': 'Yorkshire',
    '_N': 'South of Scotland',
    '_P': 'North of Scotland'
}

# Store results
results = []

for region in regions:
    discharge_col = f'BESSDischarge{region}'
    charge_col = f'BESSCharge{region}'
    carbon_col = f'CarbonIntensityForecast{region}'

    if not all(col in df.columns for col in [discharge_col, charge_col, carbon_col]):
        print(f"Skipping {region} - missing columns.")
        continue

    sub_df = df[[discharge_col, charge_col, carbon_col]].dropna()
    n_points = len(sub_df)

    if n_points < 3:
        print(f"Skipping {region} - not enough data points.")
        continue

    # Compute net BESS
    bess_net = sub_df[discharge_col] + sub_df[charge_col]
    bess_net.name = "BESS_Net"  # ensures consistent name for statsmodels
    carbon = sub_df[carbon_col]

    # Linear regression with statsmodels
    X = sm.add_constant(bess_net)
    model = sm.OLS(carbon, X).fit()

    slope = model.params['BESS_Net']
    intercept = model.params['const']
    r_squared = model.rsquared
    t_stat = model.tvalues['BESS_Net']
    # One-sided p-value (H1: slope > 0)
    p_one_sided = 1 - t.cdf(t_stat, df=n_points - 2) if slope > 0 else 1.0

    results.append({
        'Region': region,
        'Description': region_descriptions[region],
        'n': n_points,
        'Slope': round(slope, 4),
        'Intercept': round(intercept, 2),
        'R²': round(r_squared, 3),
        'p-value (slope > 0)': round(p_one_sided, 2)
    })

# Convert to DataFrame
results_df = pd.DataFrame(results).set_index('Region')
print(results_df)

# Optional: export to CSV or LaTeX
#results_df.to_csv("regional_regression_with_pvalues.csv")
# results_df.to_latex("regional_regression_with_pvalues.tex", index=True, float_format="%.4f")



                     Description      n   Slope  Intercept     R²  \
Region                                                              
_A                       Eastern  16943  0.3039     119.29  0.025   
_B                 East Midlands  16943  1.2047     188.98  0.039   
_C                        London  16943  0.0000     138.66 -0.000   
_D      Merseyside & North Wales  16943  0.9848      87.47  0.021   
_E                      Midlands  16943  1.2165     126.03  0.025   
_F                      Northern  16943  0.1048      20.19  0.004   
_G                 North Western  16943  0.4174      51.00  0.026   
_H                      Southern  16943  0.4785     189.18  0.049   
_J                 South Eastern  16943  0.6859     154.17  0.017   
_K                   South Wales  16943  6.1729     259.13  0.014   
_L                 South Western  16943  1.8143     228.46  0.027   
_M                     Yorkshire  16943  0.4050     141.42  0.032   
_N             South of Scotland  

In [None]:
#This performs regressions of regional BESS use vs carbon intensity and puts into a table

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Load CSV
csv_path = r"C:\Users\spice\Dropbox\Documents\Imperial 2024.2025\MECH70038 - Research Projects\_My Thesis\Data\Data_1Jul24_to_30Jun25.csv"
df = pd.read_csv(csv_path)

# Region codes and descriptions
regions = ['_A','_B','_C','_D','_E','_F','_G','_H','_J','_K','_L','_M','_N','_P']
region_descriptions = {
    '_A': 'Eastern',
    '_B': 'East Midlands',
    '_C': 'London',
    '_D': 'Merseyside & North Wales',
    '_E': 'Midlands',
    '_F': 'Northern',
    '_G': 'North Western',
    '_H': 'Southern',
    '_J': 'South Eastern',
    '_K': 'South Wales',
    '_L': 'South Western',
    '_M': 'Yorkshire',
    '_N': 'South of Scotland',
    '_P': 'North of Scotland'
}

# Store results
results = []

for region in regions:
    discharge_col = f'BESSDischarge{region}'
    charge_col = f'BESSCharge{region}'
    carbon_col = f'CarbonIntensityForecast{region}'

    if not all(col in df.columns for col in [discharge_col, charge_col, carbon_col]):
        print(f"Skipping region {region} - missing columns")
        continue

    sub_df = df[[discharge_col, charge_col, carbon_col]].dropna()
    n_points = len(sub_df)

    if n_points < 2:
        print(f"Skipping region {region} - not enough data for regression")
        continue

    bess_net = sub_df[discharge_col] + sub_df[charge_col]
    carbon = sub_df[carbon_col]

    x = bess_net.values.reshape(-1, 1)
    y = carbon.values.reshape(-1, 1)

    model = LinearRegression().fit(x, y)
    slope = model.coef_[0][0]
    intercept = model.intercept_[0]
    r_squared = model.score(x, y)

    results.append({
        'Region': region,
        'Description': region_descriptions[region],
        'n': n_points,
        'Slope': round(slope, 4),
        'Intercept': round(intercept, 2),
        'R²': round(r_squared, 4)
    })

# Create and display/save results
results_df = pd.DataFrame(results).set_index('Region')
print(results_df)

# Optional: save to CSV
# results_df.to_csv("regional_regression_results.csv")

# Export to LaTeX
#latex_code = results_df.to_latex(index=True, caption="Regional Linear Regression Results", label="tab:regional_regression", float_format="%.4f")
#with open("regional_regression_results.tex", "w") as f:
#    f.write(latex_code)


                     Description      n   Slope  Intercept      R²
Region                                                            
_A                       Eastern  16943  0.3039     119.29  0.0248
_B                 East Midlands  16943  1.2047     188.98  0.0390
_C                        London  16943  0.0000     138.66  0.0000
_D      Merseyside & North Wales  16943  0.9848      87.47  0.0207
_E                      Midlands  16943  1.2165     126.03  0.0250
_F                      Northern  16943  0.1048      20.19  0.0044
_G                 North Western  16943  0.4174      51.00  0.0258
_H                      Southern  16943  0.4785     189.18  0.0487
_J                 South Eastern  16943  0.6859     154.17  0.0168
_K                   South Wales  16943  6.1729     259.13  0.0142
_L                 South Western  16943  1.8143     228.46  0.0271
_M                     Yorkshire  16943  0.4050     141.42  0.0316
_N             South of Scotland  16943  0.0622      24.54  0.