In [1]:
import pandas as pd

In [2]:
# Load the input data into a DataFrame
specific_df = pd.read_csv('biourja-efzrr-y7i38ed9-input.csv')
specific_df.columns = ['Plant_Name', 'Forecast', 'Capacity']

In [3]:
specific_df.head(10)

Unnamed: 0,Plant_Name,Forecast,Capacity
0,E1,45.4,100
1,E2,31.2,150
2,E3,8.5,50
3,E4,88.2,100
4,E5,98.4,150
5,E6,238.5,300
6,E7,66.8,100
7,E8,115.4,150
8,E9,74.0,100
9,E10,156.8,200


In [4]:
# Define the updated zones level forecasts
zones_forecasts = {
    'East': 2800,
    'North': 1500,
    'West': 2000,
    'South': 6500
}

In [5]:
# zones_df is the dataframe that contains zone and their corresponding zonal forecast
zones_df = pd.DataFrame(list(zones_forecasts.items()), columns=['zone', 'forecast'])

In [6]:
zones_df.head(5)

Unnamed: 0,zone,forecast
0,East,2800
1,North,1500
2,West,2000
3,South,6500


In [7]:
# Calculate the sum of all the forecasts
total = zones_df['forecast'].sum()

# Modify the "forecast" column with the formula
zones_df['forecast'] = (zones_df['forecast'] * 12000) / total

zones_df.head()

Unnamed: 0,zone,forecast
0,East,2625.0
1,North,1406.25
2,West,1875.0
3,South,6093.75


In [8]:
# Separating the individual wind farm forecasts zone wise 
east_dataframe = specific_df[specific_df['Plant_Name'].str.startswith('E')]
north_dataframe = specific_df[specific_df['Plant_Name'].str.startswith('N')]
west_dataframe = specific_df[specific_df['Plant_Name'].str.startswith('W')]
south_dataframe = specific_df[specific_df['Plant_Name'].str.startswith('S')]


In [9]:
# Adding a new column "Weight" to each of the dataframes
east_dataframe['Weight'] = east_dataframe['Forecast'] / east_dataframe['Capacity']
north_dataframe['Weight'] = north_dataframe['Forecast'] / north_dataframe['Capacity']
west_dataframe['Weight'] = west_dataframe['Forecast'] / west_dataframe['Capacity']
south_dataframe['Weight'] = south_dataframe['Forecast'] / south_dataframe['Capacity']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['Weight'] = east_dataframe['Forecast'] / east_dataframe['Capacity']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['Weight'] = north_dataframe['Forecast'] / north_dataframe['Capacity']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  west_dataframe['Weight'] = west_data

In [10]:
east_dataframe.head(27)

Unnamed: 0,Plant_Name,Forecast,Capacity,Weight
0,E1,45.4,100,0.454
1,E2,31.2,150,0.208
2,E3,8.5,50,0.17
3,E4,88.2,100,0.882
4,E5,98.4,150,0.656
5,E6,238.5,300,0.795
6,E7,66.8,100,0.668
7,E8,115.4,150,0.769333
8,E9,74.0,100,0.74
9,E10,156.8,200,0.784


In [11]:
# Calculating the summation of weight for each dataframe
e_weight_sum = east_dataframe['Weight'].sum()
n_weight_sum = north_dataframe['Weight'].sum()
w_weight_sum = west_dataframe['Weight'].sum()
s_weight_sum = south_dataframe['Weight'].sum()

# Calculating the new_forecast for each dataframe
east_dataframe['new_forecast'] = east_dataframe['Weight'] * (zones_df['forecast'][0] / e_weight_sum)
north_dataframe['new_forecast'] = north_dataframe['Weight'] * (zones_df['forecast'][1] / n_weight_sum)
west_dataframe['new_forecast'] = west_dataframe['Weight'] * (zones_df['forecast'][2] / w_weight_sum)
south_dataframe['new_forecast'] = south_dataframe['Weight'] * (zones_df['forecast'][3] / s_weight_sum)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['new_forecast'] = east_dataframe['Weight'] * (zones_df['forecast'][0] / e_weight_sum)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['new_forecast'] = north_dataframe['Weight'] * (zones_df['forecast'][1] / n_weight_sum)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  w

In [12]:
east_dataframe.head(27)

Unnamed: 0,Plant_Name,Forecast,Capacity,Weight,new_forecast
0,E1,45.4,100,0.454,80.080859
1,E2,31.2,150,0.208,36.689028
2,E3,8.5,50,0.17,29.986225
3,E4,88.2,100,0.882,155.57559
4,E5,98.4,150,0.656,115.71155
5,E6,238.5,300,0.795,140.229698
6,E7,66.8,100,0.668,117.828225
7,E8,115.4,150,0.769333,135.702366
8,E9,74.0,100,0.74,130.528273
9,E10,156.8,200,0.784,138.289413


In [13]:
south_dataframe['Capacity'].sum()



7950

In [14]:
# Defining a function to calculate the difference between new_forecast and Capacity and replacing negatives with zero
def calculate_difference(row):
    diff = row['new_forecast'] - row['Capacity']
    return max(0, diff)

# Appling the function to each dataframe
east_dataframe['diff_between_new_forecast_and_capacity'] = east_dataframe.apply(calculate_difference, axis=1)
north_dataframe['diff_between_new_forecast_and_capacity'] = north_dataframe.apply(calculate_difference, axis=1)
west_dataframe['diff_between_new_forecast_and_capacity'] = west_dataframe.apply(calculate_difference, axis=1)
south_dataframe['diff_between_new_forecast_and_capacity'] = south_dataframe.apply(calculate_difference, axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['diff_between_new_forecast_and_capacity'] = east_dataframe.apply(calculate_difference, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['diff_between_new_forecast_and_capacity'] = north_dataframe.apply(calculate_difference, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

In [15]:
east_dataframe



Unnamed: 0,Plant_Name,Forecast,Capacity,Weight,new_forecast,diff_between_new_forecast_and_capacity
0,E1,45.4,100,0.454,80.080859,0.0
1,E2,31.2,150,0.208,36.689028,0.0
2,E3,8.5,50,0.17,29.986225,0.0
3,E4,88.2,100,0.882,155.57559,55.57559
4,E5,98.4,150,0.656,115.71155,0.0
5,E6,238.5,300,0.795,140.229698,0.0
6,E7,66.8,100,0.668,117.828225,17.828225
7,E8,115.4,150,0.769333,135.702366,0.0
8,E9,74.0,100,0.74,130.528273,30.528273
9,E10,156.8,200,0.784,138.289413,0.0


In [16]:
east_exceeded_forecast = east_dataframe['diff_between_new_forecast_and_capacity'].sum()
north_exceeded_forecast = north_dataframe['diff_between_new_forecast_and_capacity'].sum()
west_exceeded_forecast = west_dataframe['diff_between_new_forecast_and_capacity'].sum()
south_exceeded_forecast = south_dataframe['diff_between_new_forecast_and_capacity'].sum()


In [17]:
# For each of the zones we are calculating the sum of weights of wind farms being considered for distribution of exceeding new forecasts

# For 'East' dataframe
sum_weights_e_less_than_capacity = east_dataframe.loc[east_dataframe['new_forecast'] < east_dataframe['Capacity'], 'Weight'].sum()

# For 'North' dataframe
sum_weights_n_less_than_capacity = north_dataframe.loc[north_dataframe['new_forecast'] < north_dataframe['Capacity'], 'Weight'].sum()

# For 'West' dataframe
sum_weights_w_less_than_capacity = west_dataframe.loc[west_dataframe['new_forecast'] < west_dataframe['Capacity'], 'Weight'].sum()

# For 'South' dataframe
sum_weights_s_less_than_capacity = south_dataframe.loc[south_dataframe['new_forecast'] < south_dataframe['Capacity'], 'Weight'].sum()


In [19]:

east_dataframe['new_forecast'] = east_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * east_exceeded_forecast / sum_weights_e_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)

                                                                                                                

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['new_forecast'] = east_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * east_exceeded_forecast / sum_weights_e_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)


In [20]:
east_dataframe.head(27)

Unnamed: 0,Plant_Name,Forecast,Capacity,Weight,new_forecast,diff_between_new_forecast_and_capacity
0,E1,45.4,100,0.454,101.223227,0.0
1,E2,31.2,150,0.208,46.375399,0.0
2,E3,8.5,50,0.17,37.90297,0.0
3,E4,88.2,100,0.882,100.0,55.57559
4,E5,98.4,150,0.656,146.260874,0.0
5,E6,238.5,300,0.795,177.252126,0.0
6,E7,66.8,100,0.668,100.0,17.828225
7,E8,115.4,150,0.769333,171.529521,0.0
8,E9,74.0,100,0.74,100.0,30.528273
9,E10,156.8,200,0.784,174.799581,0.0


In [21]:


north_dataframe['new_forecast'] = north_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * north_exceeded_forecast / sum_weights_n_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
west_dataframe['new_forecast'] = west_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * west_exceeded_forecast / sum_weights_w_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
south_dataframe['new_forecast'] = south_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * south_exceeded_forecast / sum_weights_s_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['new_forecast'] = north_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * north_exceeded_forecast / sum_weights_n_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  west_dataframe['new_forecast'] = west_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * west_exceeded_forecast / sum_weights_w_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=

In [22]:

# If new_forecast is greater than the capacity, we will apply the same process as applied above to distribute the exceeding weights.
east_dataframe['diff_between_new_forecast_and_capacity'] = east_dataframe.apply(calculate_difference, axis=1)
north_dataframe['diff_between_new_forecast_and_capacity'] = north_dataframe.apply(calculate_difference, axis=1)
west_dataframe['diff_between_new_forecast_and_capacity'] = west_dataframe.apply(calculate_difference, axis=1)
south_dataframe['diff_between_new_forecast_and_capacity'] = south_dataframe.apply(calculate_difference, axis=1)

SyntaxError: invalid syntax (1359036020.py, line 1)

In [23]:
east_dataframe.head(27)

Unnamed: 0,Plant_Name,Forecast,Capacity,Weight,new_forecast,diff_between_new_forecast_and_capacity
0,E1,45.4,100,0.454,101.223227,0.0
1,E2,31.2,150,0.208,46.375399,0.0
2,E3,8.5,50,0.17,37.90297,0.0
3,E4,88.2,100,0.882,100.0,55.57559
4,E5,98.4,150,0.656,146.260874,0.0
5,E6,238.5,300,0.795,177.252126,0.0
6,E7,66.8,100,0.668,100.0,17.828225
7,E8,115.4,150,0.769333,171.529521,0.0
8,E9,74.0,100,0.74,100.0,30.528273
9,E10,156.8,200,0.784,174.799581,0.0


In [24]:
east_exceeded_forecast = east_dataframe['diff_between_new_forecast_and_capacity'].sum()
north_exceeded_forecast = north_dataframe['diff_between_new_forecast_and_capacity'].sum()
west_exceeded_forecast = west_dataframe['diff_between_new_forecast_and_capacity'].sum()
south_exceeded_forecast = south_dataframe['diff_between_new_forecast_and_capacity'].sum()

NameError: name 'east_individual_dataframe' is not defined

In [25]:
# For 'East' dataframe
sum_weights_e_less_than_capacity = east_dataframe.loc[east_dataframe['new_forecast'] < east_dataframe['Capacity'], 'Weight'].sum()

# For 'North' dataframe
sum_weights_n_less_than_capacity = north_dataframe.loc[north_dataframe['new_forecast'] < north_dataframe['Capacity'], 'Weight'].sum()

# For 'West' dataframe
sum_weights_w_less_than_capacity = west_dataframe.loc[west_dataframe['new_forecast'] < west_dataframe['Capacity'], 'Weight'].sum()

# For 'South' dataframe
sum_weights_s_less_than_capacity = south_dataframe.loc[south_dataframe['new_forecast'] < south_dataframe['Capacity'], 'Weight'].sum()

In [26]:
east_dataframe['new_forecast'] = east_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * east_exceeded_forecast / sum_weights_e_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
north_dataframe['new_forecast'] = north_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * north_exceeded_forecast / sum_weights_n_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
west_dataframe['new_forecast'] = west_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * west_exceeded_forecast / sum_weights_w_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
south_dataframe['new_forecast'] = south_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * south_exceeded_forecast / sum_weights_s_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['new_forecast'] = east_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * east_exceeded_forecast / sum_weights_e_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['new_forecast'] = north_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * north_exceeded_forecast / sum_weights_n_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=

In [27]:
east_dataframe['diff_between_new_forecast_and_capacity'] = east_dataframe.apply(calculate_difference, axis=1)
north_dataframe['diff_between_new_forecast_and_capacity'] = north_dataframe.apply(calculate_difference, axis=1)
west_dataframe['diff_between_new_forecast_and_capacity'] = west_dataframe.apply(calculate_difference, axis=1)
south_dataframe['diff_between_new_forecast_and_capacity'] = south_dataframe.apply(calculate_difference, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_dataframe['diff_between_new_forecast_and_capacity'] = east_dataframe.apply(calculate_difference, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_dataframe['diff_between_new_forecast_and_capacity'] = north_dataframe.apply(calculate_difference, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

In [28]:
west_exceeded_forecast = west_dataframe['diff_between_new_forecast_and_capacity'].sum()
sum_weights_w_less_than_capacity = west_dataframe.loc[west_dataframe['new_forecast'] < west_dataframe['Capacity'], 'Weight'].sum()
west_dataframe['new_forecast'] = west_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * west_exceeded_forecast / sum_weights_w_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  west_dataframe['new_forecast'] = west_dataframe.apply(lambda row: row['new_forecast'] + (row['Weight'] * west_exceeded_forecast / sum_weights_w_less_than_capacity) if row['new_forecast'] < row['Capacity'] else row['Capacity'], axis=1)


In [29]:
west_dataframe['diff_between_new_forecast_and_capacity'] = west_dataframe.apply(calculate_difference, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  west_dataframe['diff_between_new_forecast_and_capacity'] = west_dataframe.apply(calculate_difference, axis=1)


In [34]:
# Merge the four dataframes into one
merged_dataframe = pd.concat([east_dataframe, north_dataframe, west_dataframe, south_dataframe])

# Select only the 'Plant_Name' and 'new_forecast' columns
merged_dataframe = merged_dataframe[['Plant_Name', 'new_forecast']]

# Rename the 'new_forecast' column to 'forecast'
merged_dataframe.rename(columns={'new_forecast': 'Forecast'}, inplace=True)

# Save the merged dataframe to a CSV file
merged_dataframe.to_csv('merged_data.csv', index=False)

In [35]:
merged_dataframe.head(100)

Unnamed: 0,Plant_Name,Forecast
0,E1,100.000000
1,E2,58.171564
2,E3,47.544067
3,E4,100.000000
4,E5,183.464165
...,...,...
95,S31,281.550938
96,S32,150.000000
97,S33,200.000000
98,S34,50.000000
