Skip to content

Commit

Permalink
Merge pull request #141 from OnSSET/pop-calibration
Browse files Browse the repository at this point in the history
Pop calibration
  • Loading branch information
AndreasSahlberg committed Jan 5, 2022
2 parents ad84e6c + b51fbc5 commit 30acc5b
Show file tree
Hide file tree
Showing 6 changed files with 1,515 additions and 1,577 deletions.
93 changes: 19 additions & 74 deletions onsset/onsset.py
Expand Up @@ -967,40 +967,20 @@ def calibrate_current_pop_and_urban(self, pop_actual, urban_current):
# Use above ratio to calibrate the population in a new column
self.df[SET_POP_CALIB] = self.df.apply(lambda row: row[SET_POP] * pop_ratio, axis=1)
pop_modelled = self.df[SET_POP_CALIB].sum()
pop_diff = abs(pop_modelled - pop_actual)
print('The calibrated population differs by {:.2f}. '
'In case this is not acceptable please revise this part of the code'.format(pop_diff))

# TODO Why do we apply the ratio to elec_pop? Shouldn't the calibration take place before defining elec_pop?
self.df[SET_ELEC_POP_CALIB] = self.df[SET_ELEC_POP] * pop_ratio

logging.info('Urban/rural calibration process')
# TODO As indicated below, HRSL classifies in 0, 1 and 2; I don't get why if statement uses 3 here.
if max(self.df[SET_URBAN]) == 3: # THIS OPTION IS CURRENTLY DISABLED
calibrate = True if 'n' in input(
'Use urban definition from GIS layer <y/n> (n=model calibration):') else False
else:
calibrate = True
# RUN_PARAM: This is where manual calibration of urban/rural population takes place.
# The model uses 0, 1, 2 as GHS population layer does.
# As of this version, urban are only self.dfs with value equal to 2
if calibrate:
urban_modelled = 2
factor = 1
while abs(urban_modelled - urban_current) > 0.01:
self.df[SET_URBAN] = 0
self.df.loc[(self.df[SET_POP_CALIB] > 5000 * factor) & (
self.df[SET_POP_CALIB] / self.df[SET_GRID_CELL_AREA] > 350 * factor), SET_URBAN] = 1
self.df.loc[(self.df[SET_POP_CALIB] > 50000 * factor) & (
self.df[SET_POP_CALIB] / self.df[SET_GRID_CELL_AREA] > 1500 * factor), SET_URBAN] = 2
pop_urb = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum()
urban_modelled = pop_urb / pop_actual
if urban_modelled > urban_current:
factor *= 1.1
else:
factor *= 0.9
# The model uses 0, 1, 2 as follows; 0 = rural, 1 = peri-urban, 2 = urban.
# The calibration build into the model only classifies into urban/rural

# Get the calculated urban ratio, and limit it to within reasonable boundaries
self.df.sort_values(by=[SET_POP_CALIB], inplace=True, ascending=False)
cumulative_urban_pop = self.df[SET_POP_CALIB].cumsum()
self.df[SET_URBAN] = np.where(cumulative_urban_pop < (urban_current * self.df[SET_POP_CALIB].sum()), 2, 0)
self.df.sort_index(inplace=True)

# Get the calculated urban ratio and compare to the actual ratio
pop_urb = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum()
urban_modelled = pop_urb / pop_actual

Expand All @@ -1010,58 +990,28 @@ def calibrate_current_pop_and_urban(self, pop_actual, urban_current):

return pop_modelled, urban_modelled

def project_pop_and_urban(self, pop_modelled, pop_future_high, pop_future_low, urban_modelled,
urban_future, start_year, end_year, intermediate_year):
def project_pop_and_urban(self, pop_future, urban_future, start_year, years_of_analysis):
"""
This function projects population and urban/rural ratio for the different years of the analysis
"""
project_life = end_year - start_year
project_life = years_of_analysis[-1] - start_year

# Project future population, with separate growth rates for urban and rural
logging.info('Population projection process')
start_year_pop = self.df[SET_POP_CALIB].sum()
start_year_urban_ratio = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum() / start_year_pop

# TODO this is a residual of the previous process;
# shall we delete? Is there any scenario where we don't apply projections?
calibrate = True

if calibrate:
urban_growth_high = (urban_future * pop_future_high) / (urban_modelled * pop_modelled)
rural_growth_high = ((1 - urban_future) * pop_future_high) / ((1 - urban_modelled) * pop_modelled)

yearly_urban_growth_rate_high = urban_growth_high ** (1 / project_life)
yearly_rural_growth_rate_high = rural_growth_high ** (1 / project_life)

urban_growth_low = (urban_future * pop_future_low) / (urban_modelled * pop_modelled)
rural_growth_low = ((1 - urban_future) * pop_future_low) / ((1 - urban_modelled) * pop_modelled)

yearly_urban_growth_rate_low = urban_growth_low ** (1 / project_life)
yearly_rural_growth_rate_low = rural_growth_low ** (1 / project_life)
else:
urban_growth_high = pop_future_high / pop_modelled
rural_growth_high = pop_future_high / pop_modelled
urban_growth = (urban_future * pop_future) / (start_year_urban_ratio * start_year_pop)
rural_growth = ((1 - urban_future) * pop_future) / ((1 - start_year_urban_ratio) * start_year_pop)

yearly_urban_growth_rate_high = urban_growth_high ** (1 / project_life)
yearly_rural_growth_rate_high = rural_growth_high ** (1 / project_life)

urban_growth_low = pop_future_low / pop_modelled
rural_growth_low = pop_future_low / pop_modelled

yearly_urban_growth_rate_low = urban_growth_low ** (1 / project_life)
yearly_rural_growth_rate_low = rural_growth_low ** (1 / project_life)

# RUN_PARAM: Define here the years for which results should be provided in the output file.
years_of_analysis = [intermediate_year, end_year]
yearly_urban_growth_rate = urban_growth ** (1 / project_life)
yearly_rural_growth_rate = rural_growth ** (1 / project_life)

for year in years_of_analysis:
self.df[SET_POP + "{}".format(year) + 'High'] = \
self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate_high ** (year - start_year))
self.df[SET_POP + "{}".format(year)] = \
self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate ** (year - start_year))
if row[SET_URBAN] > 1
else row[SET_POP_CALIB] * (yearly_rural_growth_rate_high ** (year - start_year)), axis=1)

self.df[SET_POP + "{}".format(year) + 'Low'] = \
self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate_low ** (year - start_year))
if row[SET_URBAN] > 1
else row[SET_POP_CALIB] * (yearly_rural_growth_rate_low ** (year - start_year)), axis=1)
else row[SET_POP_CALIB] * (yearly_rural_growth_rate ** (year - start_year)), axis=1)

self.df[SET_POP + "{}".format(start_year)] = self.df.apply(lambda row: row[SET_POP_CALIB], axis=1)

Expand Down Expand Up @@ -1740,11 +1690,6 @@ def set_scenario_variables(self, year, num_people_per_hh_rural, num_people_per_h
"""

if end_year_pop == 0:
self.df[SET_POP + "{}".format(year)] = self.df[SET_POP + "{}".format(year) + 'Low']
else:
self.df[SET_POP + "{}".format(year)] = self.df[SET_POP + "{}".format(year) + 'High']

self.calculate_new_connections(year, time_step, start_year)
self.set_residential_demand(rural_tier, urban_tier, num_people_per_hh_rural, num_people_per_hh_urban,
productive_demand)
Expand Down
17 changes: 4 additions & 13 deletions onsset/runner.py
Expand Up @@ -64,30 +64,17 @@ def calibration(specs_path, csv_path, specs_path_calib, calibrated_csv_path):
onsseter.df[SET_WINDCF] = onsseter.calc_wind_cfs()

pop_actual = specs_data.loc[0, SPE_POP]
pop_future_high = specs_data.loc[0, SPE_POP_FUTURE + 'High']
pop_future_low = specs_data.loc[0, SPE_POP_FUTURE + 'Low']
urban_current = specs_data.loc[0, SPE_URBAN]
urban_future = specs_data.loc[0, SPE_URBAN_FUTURE]
start_year = int(specs_data.loc[0, SPE_START_YEAR])
end_year = int(specs_data.loc[0, SPE_END_YEAR])

intermediate_year = 2025
elec_actual = specs_data.loc[0, SPE_ELEC]
elec_actual_urban = specs_data.loc[0, SPE_ELEC_URBAN]
elec_actual_rural = specs_data.loc[0, SPE_ELEC_RURAL]

pop_modelled, urban_modelled = onsseter.calibrate_current_pop_and_urban(pop_actual, urban_current)

onsseter.project_pop_and_urban(pop_modelled, pop_future_high, pop_future_low, urban_modelled,
urban_future, start_year, end_year, intermediate_year)

elec_modelled, rural_elec_ratio, urban_elec_ratio = \
onsseter.elec_current_and_future(elec_actual, elec_actual_urban, elec_actual_rural, start_year)

# In case there are limitations in the way grid expansion is moving in a country,
# this can be reflected through gridspeed.
# In this case the parameter is set to a very high value therefore is not taken into account.

specs_data.loc[0, SPE_URBAN_MODELLED] = urban_modelled
specs_data.loc[0, SPE_ELEC_MODELLED] = elec_modelled
specs_data.loc[0, 'rural_elec_ratio_modelled'] = rural_elec_ratio
Expand Down Expand Up @@ -127,6 +114,8 @@ def scenario(specs_path, calibrated_csv_path, results_folder, summary_folder):
for scenario in scenarios:
print('Scenario: ' + str(scenario + 1))
country_id = specs_data.iloc[0]['CountryCode']
pop_future = specs_data.loc[0, SPE_POP_FUTURE]
urban_future = specs_data.loc[0, SPE_URBAN_FUTURE]

pop_index = scenario_info.iloc[scenario]['Population_Growth']
tier_index = scenario_info.iloc[scenario]['Target_electricity_consumption_level']
Expand Down Expand Up @@ -271,6 +260,8 @@ def scenario(specs_path, calibrated_csv_path, results_folder, summary_folder):

onsseter.current_mv_line_dist()

onsseter.project_pop_and_urban(pop_future, urban_future, start_year, yearsofanalysis)

for year in yearsofanalysis:
eleclimit = eleclimits[year]
time_step = time_steps[year]
Expand Down
Binary file modified test/test_data/dj-specs-test.xlsx
Binary file not shown.

0 comments on commit 30acc5b

Please sign in to comment.