In [3]:
import pandas as pd
import datetime
import warnings
warnings.filterwarnings('ignore')

In [10]:
fullData = pd.read_csv("../00_data/data_with_calc_fields.csv", sep=";", decimal=",", index_col="datetime")
fullData.index = pd.to_datetime(fullData.index)
formatString = "{:,.4f}"

In [12]:
# label data > 0 : 1; data < 0 : 0; data == 0 : 2
fullData["posNegIndicator_wind"] = fullData["wind_forecast_error"].map(lambda x: 1 if x > 0 else 0 if x < 0 else 2)
fullData["posNegIndicator_solar"] = fullData["solar_forecast_error"].map(lambda x: 1 if x > 0 else 0 if x < 0 else 2)
fullData["posNegIndicator_total"] = fullData["forecast_error"].map(lambda x: 1 if x > 0 else 0 if x < 0 else 2)
fullData["posNegIndicator_priceDiff"] = fullData["price_diff"].map(lambda x: 1 if x > 0 else 0 if x < 0 else 2)

# select conditional data for wind/solar/total forecastError isPositive
data_isPositive_wind = fullData[(fullData.posNegIndicator_wind == 1)]
data_isPositive_solar = fullData[(fullData.posNegIndicator_solar == 1)]
data_isPositive_total = fullData[(fullData.posNegIndicator_total == 1)]

# select conditional data for wind/solar/total forecastError isNegative
data_isNegative_wind = fullData[(fullData.posNegIndicator_wind == 0)]
data_isNegative_solar = fullData[(fullData.posNegIndicator_solar == 0)]
data_isNegative_total = fullData[(fullData.posNegIndicator_total == 0)]

# select conditional data for wind/solar/total forecastError isZero
data_isZero_wind = fullData[(fullData.posNegIndicator_wind == 2)]
data_isZero_solar = fullData[(fullData.posNegIndicator_solar == 2)]
data_isZero_total = fullData[(fullData.posNegIndicator_total == 2)]

# compute conditional probabilities for forecastError isPositive
data_isPositive_wind = data_isPositive_wind["posNegIndicator_priceDiff"]
data_isPositive_solar = data_isPositive_solar["posNegIndicator_priceDiff"]
data_isPositive_total = data_isPositive_total["posNegIndicator_priceDiff"]
condProb_PD_isPositiveOrZero_on_FE_isPositive_wind = data_isPositive_wind[(data_isPositive_wind != 0)].sum() / data_isPositive_wind.count()
condProb_PD_isPositiveOrZero_on_FE_isPositive_solar = data_isPositive_solar[(data_isPositive_solar != 0)].sum() / data_isPositive_solar.count()
condProb_PD_isPositiveOrZero_on_FE_isPositive_total = data_isPositive_total[(data_isPositive_total != 0)].sum() / data_isPositive_total.count()

# compute conditional probabilities for forecastError isNegative
data_isNegative_wind = data_isNegative_wind["posNegIndicator_priceDiff"]
data_isNegative_solar = data_isNegative_solar["posNegIndicator_priceDiff"]
data_isNegative_total = data_isNegative_total["posNegIndicator_priceDiff"]
condProb_PD_isPositiveOrZero_on_FE_isNegative_wind = data_isNegative_wind[(data_isNegative_wind != 0)].sum() / data_isNegative_wind.count()
condProb_PD_isPositiveOrZero_on_FE_isNegative_solar = data_isNegative_solar[(data_isNegative_solar != 0)].sum() / data_isNegative_solar.count()
condProb_PD_isPositiveOrZero_on_FE_isNegative_total = data_isNegative_total[(data_isNegative_total != 0)].sum() / data_isNegative_total.count()

# compute conditional probabilities for forecastError isZero
data_isZero_wind = data_isZero_wind["posNegIndicator_priceDiff"]
data_isZero_solar = data_isZero_solar["posNegIndicator_priceDiff"]
data_isZero_total = data_isZero_total["posNegIndicator_priceDiff"]
condProb_PD_isPositiveOrZero_on_FE_isZero_wind = data_isZero_wind[(data_isZero_wind != 0)].sum() / data_isZero_wind.count()
condProb_PD_isPositiveOrZero_on_FE_isZero_solar = data_isZero_solar[(data_isZero_solar != 0)].sum() / data_isZero_solar.count()
condProb_PD_isPositiveOrZero_on_FE_isZero_total = data_isZero_total[(data_isZero_total != 0)].sum() / data_isZero_total.count()

# result output
print("P(PD_posOrZero | FE_pos) [wind] : " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isPositive_wind))
print("P(PD_negative  | FE_pos) [wind] : " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isPositive_wind))
print("Number of datapoints for FE > 0 [wind]: " + str(data_isPositive_wind.count()))
print("\n")
print("P(PD_posOrZero | FE_pos) [solar]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isPositive_solar))
print("P(PD_negative  | FE_pos) [solar]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isPositive_solar))
print("Number of datapoints for FE > 0 [solar]: " + str(data_isPositive_solar.count()))
print("\n")
print("P(PD_posOrZero | FE_pos) [total]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isPositive_total))
print("P(PD_negative  | FE_pos) [total]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isPositive_total))
print("Number of datapoints for FE > 0 [total]: " + str(data_isPositive_total.count()))

print("\n------------------------------------\n")

print("P(PD_posOrZero | FE_negative) [wind] : " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isNegative_wind))
print("P(PD_negative  | FE_negative) [wind] : " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isNegative_wind))
print("Number of datapoints for FE < 0 [wind]: " + str(data_isNegative_wind.count()))
print("\n")
print("P(PD_posOrZero | FE_negative) [solar]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isNegative_solar))
print("P(PD_negative  | FE_negative) [solar]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isNegative_solar))
print("Number of datapoints for FE < 0 [solar]: " + str(data_isNegative_solar.count()))
print("\n")
print("P(PD_posOrZero | FE_negative) [total]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isNegative_total))
print("P(PD_negative  | FE_negative) [total]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isNegative_total))
print("Number of datapoints for FE < 0 [total]: " + str(data_isNegative_total.count()))

print("\n------------------------------------\n")

print("P(PD_posOrZero | FE_zero) [wind] : " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isZero_wind))
print("P(PD_negative  | FE_zero) [wind] : " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isZero_wind))
print("Number of datapoints for FE = 0 [wind]: " + str(data_isZero_wind.count()))
print("\n")
print("P(PD_posOrZero | FE_zero) [solar]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isZero_solar))
print("P(PD_negative  | FE_zero) [solar]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isZero_solar))
print("Number of datapoints for FE = 0 [solar]: " + str(data_isZero_solar.count()))
print("\n")
print("P(PD_posOrZero | FE_zero) [total]: " + formatString.format(condProb_PD_isPositiveOrZero_on_FE_isZero_total))
print("P(PD_negative  | FE_zero) [total]: " + formatString.format(1 - condProb_PD_isPositiveOrZero_on_FE_isZero_total))
print("Number of datapoints for FE = 0 [total]: " + str(data_isZero_total.count()))

P(PD_posOrZero | FE_pos) [wind] : 0.3609
P(PD_negative  | FE_pos) [wind] : 0.6391
Number of datapoints for FE > 0 [wind]: 9036


P(PD_posOrZero | FE_pos) [solar]: 0.3677
P(PD_negative  | FE_pos) [solar]: 0.6323
Number of datapoints for FE > 0 [solar]: 4161


P(PD_posOrZero | FE_pos) [total]: 0.3311
P(PD_negative  | FE_pos) [total]: 0.6689
Number of datapoints for FE > 0 [total]: 8868

------------------------------------

P(PD_posOrZero | FE_negative) [wind] : 0.6554
P(PD_negative  | FE_negative) [wind] : 0.3446
Number of datapoints for FE < 0 [wind]: 8498


P(PD_posOrZero | FE_negative) [solar]: 0.5838
P(PD_negative  | FE_negative) [solar]: 0.4162
Number of datapoints for FE < 0 [solar]: 5865


P(PD_posOrZero | FE_negative) [total]: 0.6800
P(PD_negative  | FE_negative) [total]: 0.3200
Number of datapoints for FE < 0 [total]: 8670

------------------------------------

P(PD_posOrZero | FE_zero) [wind] : 0.5000
P(PD_negative  | FE_zero) [wind] : 0.5000
Number of datapoints for FE = 0 [w

# Expectations

Expectation: P(PD_negative  | FE_pos) > P(PD_posOrZero  | FE_pos)
    - since increased supply should lead to lower prices in intraday
Expectation: P(PD_posOrZero | FE_negative) > P(PD_negative | FE_negative)
    - since demand shortage should lead to higher prices in intraday
Expectation P(PD_posOrZero | FE_zero) ~~ P(PD_negative | FE_zero)
    - we expect them to be roughly equal (i.e 50/50) since a FE of zero should not provide any price signals

# Conclusion

###### Case FE_pos or FE_neg:

All expectations are met:

Total
    - Both expectations are met with at least 16% deviation from the random distribution of 50% (up and down)
Wind:
    - Both expectations are met with at least 13% deviation from the random distribution of 50% (up and down)
Solar
    - Both expectations are met with at 8% deviation from the random distribution of 50% (up and down)
    
###### Case: FE_zero:

Total
    - The expectation is not met with a deviation of 25% from the random distribution of 50%. But there are only 4 datapoints with FE = 0 so the validity of the conditional probabilities is questionable.
Wind:
    - The expectation is met precisely but the are only 8 datapoints with FE = 0 so the validity of the conditional probabilities is questionable.
Solar
    - The expectation is almost met with a slight deviation of 1.6% from the random distribution of 50%. The high number of datapoints with FE = 0 (7516) suggests that the conditional probabilities are valid.
    

###### ==> We conclude from the above analyses, that all of our expectations are met.
