Merge pull request #141 from OnSSET/pop-calibration

Pop calibration
OnSSET · Jan 5, 2022 · 30acc5b · 30acc5b
2 parents ad84e6c + b51fbc5
commit 30acc5b
Show file tree

Hide file tree

Showing 6 changed files with 1,515 additions and 1,577 deletions.
diff --git a/onsset/onsset.py b/onsset/onsset.py
@@ -967,40 +967,20 @@ def calibrate_current_pop_and_urban(self, pop_actual, urban_current):
         # Use above ratio to calibrate the population in a new column
         self.df[SET_POP_CALIB] = self.df.apply(lambda row: row[SET_POP] * pop_ratio, axis=1)
         pop_modelled = self.df[SET_POP_CALIB].sum()
-        pop_diff = abs(pop_modelled - pop_actual)
-        print('The calibrated population differs by {:.2f}. '
-              'In case this is not acceptable please revise this part of the code'.format(pop_diff))
 
-        # TODO Why do we apply the ratio to elec_pop? Shouldn't the calibration take place before defining elec_pop?
         self.df[SET_ELEC_POP_CALIB] = self.df[SET_ELEC_POP] * pop_ratio
 
         logging.info('Urban/rural calibration process')
-        # TODO As indicated below, HRSL classifies in 0, 1 and 2; I don't get why if statement uses 3 here.
-        if max(self.df[SET_URBAN]) == 3:  # THIS OPTION IS CURRENTLY DISABLED
-            calibrate = True if 'n' in input(
-                'Use urban definition from GIS layer <y/n> (n=model calibration):') else False
-        else:
-            calibrate = True
         # RUN_PARAM: This is where manual calibration of urban/rural population takes place.
-        # The model uses 0, 1, 2 as GHS population layer does.
-        # As of this version, urban are only self.dfs with value equal to 2
-        if calibrate:
-            urban_modelled = 2
-            factor = 1
-            while abs(urban_modelled - urban_current) > 0.01:
-                self.df[SET_URBAN] = 0
-                self.df.loc[(self.df[SET_POP_CALIB] > 5000 * factor) & (
-                        self.df[SET_POP_CALIB] / self.df[SET_GRID_CELL_AREA] > 350 * factor), SET_URBAN] = 1
-                self.df.loc[(self.df[SET_POP_CALIB] > 50000 * factor) & (
-                        self.df[SET_POP_CALIB] / self.df[SET_GRID_CELL_AREA] > 1500 * factor), SET_URBAN] = 2
-                pop_urb = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum()
-                urban_modelled = pop_urb / pop_actual
-                if urban_modelled > urban_current:
-                    factor *= 1.1
-                else:
-                    factor *= 0.9
+        # The model uses 0, 1, 2 as follows; 0 = rural, 1 = peri-urban, 2 = urban.
+        # The calibration build into the model only classifies into urban/rural
 
-        # Get the calculated urban ratio, and limit it to within reasonable boundaries
+        self.df.sort_values(by=[SET_POP_CALIB], inplace=True, ascending=False)
+        cumulative_urban_pop = self.df[SET_POP_CALIB].cumsum()
+        self.df[SET_URBAN] = np.where(cumulative_urban_pop < (urban_current * self.df[SET_POP_CALIB].sum()), 2, 0)
+        self.df.sort_index(inplace=True)
+
+        # Get the calculated urban ratio and compare to the actual ratio
         pop_urb = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum()
         urban_modelled = pop_urb / pop_actual
 
@@ -1010,58 +990,28 @@ def calibrate_current_pop_and_urban(self, pop_actual, urban_current):
 
         return pop_modelled, urban_modelled
 
-    def project_pop_and_urban(self, pop_modelled, pop_future_high, pop_future_low, urban_modelled,
-                              urban_future, start_year, end_year, intermediate_year):
+    def project_pop_and_urban(self, pop_future, urban_future, start_year, years_of_analysis):
         """
         This function projects population and urban/rural ratio for the different years of the analysis
         """
-        project_life = end_year - start_year
+        project_life = years_of_analysis[-1] - start_year
 
         # Project future population, with separate growth rates for urban and rural
         logging.info('Population projection process')
+        start_year_pop = self.df[SET_POP_CALIB].sum()
+        start_year_urban_ratio = self.df.loc[self.df[SET_URBAN] > 1, SET_POP_CALIB].sum() / start_year_pop
 
-        # TODO this is a residual of the previous process;
-        # shall we delete? Is there any scenario where we don't apply projections?
-        calibrate = True
-
-        if calibrate:
-            urban_growth_high = (urban_future * pop_future_high) / (urban_modelled * pop_modelled)
-            rural_growth_high = ((1 - urban_future) * pop_future_high) / ((1 - urban_modelled) * pop_modelled)
-
-            yearly_urban_growth_rate_high = urban_growth_high ** (1 / project_life)
-            yearly_rural_growth_rate_high = rural_growth_high ** (1 / project_life)
-
-            urban_growth_low = (urban_future * pop_future_low) / (urban_modelled * pop_modelled)
-            rural_growth_low = ((1 - urban_future) * pop_future_low) / ((1 - urban_modelled) * pop_modelled)
-
-            yearly_urban_growth_rate_low = urban_growth_low ** (1 / project_life)
-            yearly_rural_growth_rate_low = rural_growth_low ** (1 / project_life)
-        else:
-            urban_growth_high = pop_future_high / pop_modelled
-            rural_growth_high = pop_future_high / pop_modelled
+        urban_growth = (urban_future * pop_future) / (start_year_urban_ratio * start_year_pop)
+        rural_growth = ((1 - urban_future) * pop_future) / ((1 - start_year_urban_ratio) * start_year_pop)
 
-            yearly_urban_growth_rate_high = urban_growth_high ** (1 / project_life)
-            yearly_rural_growth_rate_high = rural_growth_high ** (1 / project_life)
-
-            urban_growth_low = pop_future_low / pop_modelled
-            rural_growth_low = pop_future_low / pop_modelled
-
-            yearly_urban_growth_rate_low = urban_growth_low ** (1 / project_life)
-            yearly_rural_growth_rate_low = rural_growth_low ** (1 / project_life)
-
-        # RUN_PARAM: Define here the years for which results should be provided in the output file.
-        years_of_analysis = [intermediate_year, end_year]
+        yearly_urban_growth_rate = urban_growth ** (1 / project_life)
+        yearly_rural_growth_rate = rural_growth ** (1 / project_life)
 
         for year in years_of_analysis:
-            self.df[SET_POP + "{}".format(year) + 'High'] = \
-                self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate_high ** (year - start_year))
+            self.df[SET_POP + "{}".format(year)] = \
+                self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate ** (year - start_year))
                               if row[SET_URBAN] > 1
-                              else row[SET_POP_CALIB] * (yearly_rural_growth_rate_high ** (year - start_year)), axis=1)
-
-            self.df[SET_POP + "{}".format(year) + 'Low'] = \
-                self.df.apply(lambda row: row[SET_POP_CALIB] * (yearly_urban_growth_rate_low ** (year - start_year))
-                              if row[SET_URBAN] > 1
-                              else row[SET_POP_CALIB] * (yearly_rural_growth_rate_low ** (year - start_year)), axis=1)
+                              else row[SET_POP_CALIB] * (yearly_rural_growth_rate ** (year - start_year)), axis=1)
 
         self.df[SET_POP + "{}".format(start_year)] = self.df.apply(lambda row: row[SET_POP_CALIB], axis=1)
 
@@ -1740,11 +1690,6 @@ def set_scenario_variables(self, year, num_people_per_hh_rural, num_people_per_h
 
         """
 
-        if end_year_pop == 0:
-            self.df[SET_POP + "{}".format(year)] = self.df[SET_POP + "{}".format(year) + 'Low']
-        else:
-            self.df[SET_POP + "{}".format(year)] = self.df[SET_POP + "{}".format(year) + 'High']
-
         self.calculate_new_connections(year, time_step, start_year)
         self.set_residential_demand(rural_tier, urban_tier, num_people_per_hh_rural, num_people_per_hh_urban,
                                     productive_demand)

diff --git a/onsset/runner.py b/onsset/runner.py
@@ -64,30 +64,17 @@ def calibration(specs_path, csv_path, specs_path_calib, calibrated_csv_path):
     onsseter.df[SET_WINDCF] = onsseter.calc_wind_cfs()
 
     pop_actual = specs_data.loc[0, SPE_POP]
-    pop_future_high = specs_data.loc[0, SPE_POP_FUTURE + 'High']
-    pop_future_low = specs_data.loc[0, SPE_POP_FUTURE + 'Low']
     urban_current = specs_data.loc[0, SPE_URBAN]
-    urban_future = specs_data.loc[0, SPE_URBAN_FUTURE]
     start_year = int(specs_data.loc[0, SPE_START_YEAR])
-    end_year = int(specs_data.loc[0, SPE_END_YEAR])
-
-    intermediate_year = 2025
     elec_actual = specs_data.loc[0, SPE_ELEC]
     elec_actual_urban = specs_data.loc[0, SPE_ELEC_URBAN]
     elec_actual_rural = specs_data.loc[0, SPE_ELEC_RURAL]
 
     pop_modelled, urban_modelled = onsseter.calibrate_current_pop_and_urban(pop_actual, urban_current)
 
-    onsseter.project_pop_and_urban(pop_modelled, pop_future_high, pop_future_low, urban_modelled,
-                                   urban_future, start_year, end_year, intermediate_year)
-
     elec_modelled, rural_elec_ratio, urban_elec_ratio = \
         onsseter.elec_current_and_future(elec_actual, elec_actual_urban, elec_actual_rural, start_year)
 
-    # In case there are limitations in the way grid expansion is moving in a country, 
-    # this can be reflected through gridspeed.
-    # In this case the parameter is set to a very high value therefore is not taken into account.
-
     specs_data.loc[0, SPE_URBAN_MODELLED] = urban_modelled
     specs_data.loc[0, SPE_ELEC_MODELLED] = elec_modelled
     specs_data.loc[0, 'rural_elec_ratio_modelled'] = rural_elec_ratio
@@ -127,6 +114,8 @@ def scenario(specs_path, calibrated_csv_path, results_folder, summary_folder):
     for scenario in scenarios:
         print('Scenario: ' + str(scenario + 1))
         country_id = specs_data.iloc[0]['CountryCode']
+        pop_future = specs_data.loc[0, SPE_POP_FUTURE]
+        urban_future = specs_data.loc[0, SPE_URBAN_FUTURE]
 
         pop_index = scenario_info.iloc[scenario]['Population_Growth']
         tier_index = scenario_info.iloc[scenario]['Target_electricity_consumption_level']
@@ -271,6 +260,8 @@ def scenario(specs_path, calibrated_csv_path, results_folder, summary_folder):
 
         onsseter.current_mv_line_dist()
 
+        onsseter.project_pop_and_urban(pop_future, urban_future, start_year, yearsofanalysis)
+
         for year in yearsofanalysis:
             eleclimit = eleclimits[year]
             time_step = time_steps[year]

diff --git a/test/test_data/dj-specs-test.xlsx b/test/test_data/dj-specs-test.xlsx