updated s_post_installation script in order that correct scenario con…

…figurations are generated
nismod · Mar 6, 2019 · 13e330e · 13e330e
1 parent 008968b
commit 13e330e
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 145 deletions.
diff --git a/README.rst b/README.rst
@@ -70,7 +70,7 @@ In order to run HIRE, first the model needs to be set up (Section 1.1 or Section
 
 4.  Install HIRE from within the console with the command
 
-    ``energy_demand setup -d path/to/energy_data_folder -f path/to/config.ini``
+    ``energy_demand setup -f path/to/config.ini``
 
     The ``path/to/energy_data_folder`` is the path to the location with
     the necessary data to run the model.

diff --git a/energy_demand/scripts/s_post_installation.py b/energy_demand/scripts/s_post_installation.py
@@ -94,13 +94,19 @@ def post_install_setup(args):
     # Complete gva and pop data for every sector
     data_pop = os.path.join(local_data_path, "scenarios", "MISTRAL_pop_gva", "data")
     path_geography = os.path.join(local_data_path, "scenarios", "uk_pop_principal_2015_2050_MSOA_england.csv")
-    geography_name = "region"  # "lad_uk_2016"
+    geography_name = "lad_uk_2016"
+
+    # All MISTRAL scenarios to prepare with correct config
+    scenarios_to_generate = [
+        'pop-baseline16_econ-c16_fuel-c16'
+    ]
 
     script_data_preparation_MISTRAL_pop_gva.run(
         path_to_folder=data_pop,
         path_MSOA_baseline=path_geography,
         MSOA_calculations=False,
-        geography_name="region") # "lad_uk_2016"
+        geography_name=geography_name,
+        scenarios_to_generate=scenarios_to_generate)
 
     print("... successfully finished setup")
     return
diff --git a/energy_demand/scripts/smif_data_related/generate_data_version.py b/energy_demand/scripts/smif_data_related/generate_data_version.py
@@ -177,4 +177,4 @@ def package_data(
     """
     # Map command line arguments to function arguments.
     #package_data(*sys.argv[1:])
-    package_data('v_083', 'C:/Users/cenv0553/ED/data')
+    package_data('v0.8.9', 'C:/Users/cenv0553/ED/data')
diff --git a/energy_demand/scripts/smif_data_related/script_data_preparation_MISTRAL_pop_gva.py b/energy_demand/scripts/smif_data_related/script_data_preparation_MISTRAL_pop_gva.py
@@ -8,12 +8,12 @@
 from energy_demand.basic import lookup_tables
 from energy_demand.basic import basic_functions
 
-
 def run(
         path_to_folder,
         path_MSOA_baseline,
         MSOA_calculations=False,
-        geography_name='region'
+        geography_name='region',
+        scenarios_to_generate=[]
         ):
     """
     path_to_folder : str
@@ -202,7 +202,7 @@ def run(
     # ---------------------------------------------------------------------------------------------------
     # Add interval and create individual GVA data for selected sectors
     # ---------------------------------------------------------------------------------------------------
-    columns = ['timestep', 'sector', geography_name, 'value']
+    columns = ['timestep', 'sectors', geography_name]
 
     # Get all folders with scenario run results (name of folder is scenario)
     all_csv_folders_walk = os.walk(path_to_folder)
@@ -211,165 +211,163 @@ def run(
         break
 
     for folder_name in all_csv_folders:
-        print("folder name: " + str(folder_name), flush=True)
+
         all_files = os.listdir(os.path.join(path_to_folder, folder_name))
+        if folder_name in scenarios_to_generate:
+            print("folder name: " + str(folder_name), flush=True)
+            for file_name in all_files:
+                filename_split = file_name.split("__")
+                var_name = filename_split[0]
+                if (var_name == "gva_per_head" and filename_split[1] == 'lad_sector.csv') or (
+                    var_name == "population" and filename_split[1] == 'lad.csv') or (
+                        var_name == "gva_per_head" and filename_split[1] == 'lad.csv'):
+
+                    try:
 
-        for file_name in all_files:
-            filename_split = file_name.split("__")
+                        file_path = os.path.join(path_to_folder, folder_name, file_name)
+                        print("file_path " + str(file_path))
+                        gp_file = pd.read_csv(file_path)
 
-            if (filename_split[0] == "gva_per_head" and filename_split[1] == 'lad_sector.csv') or (
-                filename_split[0] == "population" and filename_split[1] == 'lad.csv') or (
-                    filename_split[0] == "gva_per_head" and filename_split[1] == 'lad.csv'):
-
-                try:
-                    file_path = os.path.join(path_to_folder, folder_name, file_name)
-                    gp_file = pd.read_csv(file_path)
+                        gp_file['year'] = gp_file['year'].astype(int)
 
-                    gp_file['year'] = gp_file['year'].astype(int)
+                        # Drop all rows with alls NaN entries
+                        gp_file = gp_file[np.isfinite(gp_file['value'])]
 
-                    # Drop all rows with alls NaN entries
-                    gp_file = gp_file[np.isfinite(gp_file['value'])]
+                        # Select all entries with matching years
+                        gp_file = gp_file.loc[gp_file['year'].isin(range(base_yr, end_yr + 1))]
 
-                    # Select all entries with matching years
-                    gp_file = gp_file.loc[gp_file['year'].isin(range(base_yr, end_yr + 1))]
+                        # Rename columns
+                        gp_file = gp_file.rename(index=str, columns={"year": "timestep"})
+                        gp_file = gp_file.rename(index=str, columns={"region": geography_name})
+                        gp_file = gp_file.rename(index=str, columns={"value": var_name})
 
-                    # Rename columns
-                    gp_file = gp_file.rename(index=str, columns={"year": "timestep"})
-                    ##gp_file = gp_file.rename(index=str, columns={"region": geography_name})
+                        gp_file.to_csv(file_path, index=False)
 
-                    gp_file.to_csv(file_path, index=False)
-
-                    # ---  
-                    # MSOA pop calculation
-                    # ----
-                    if MSOA_calculations:
-                        if (filename_split[0] == "population" and filename_split[1] == 'lad.csv'):
-
-                            # Calculate relative pop percentage of ONS scenarios
-                            msoa_principalDF = pd.read_csv(path_MSOA_baseline)
-                            msoa_principalDF_selection_2015 = msoa_principalDF.loc[msoa_principalDF['year'] == 2015]
-
-                            # LADs and calculate factor per MSOA
-                            factor_msoas = {}
-                            for lad, msoas in LAD_MSOA_lu.items():
-                                tot_pop_lad = 0
-                                for msoa in msoas:
-                                    tot_pop_lad += float(msoa_principalDF_selection_2015.loc[msoa_principalDF_selection_2015['region'] == msoa]['value'])
-                                for msoa in msoas:
-                                    pop_msoa = float(msoa_principalDF_selection_2015.loc[msoa_principalDF_selection_2015['region'] == msoa]['value'])
-                                    factor_msoas[msoa] = pop_msoa / tot_pop_lad #calculate fator
+                        # ---  
+                        # MSOA pop calculation
+                        # ----
+                        if MSOA_calculations:
+                            if (filename_split[0] == "population" and filename_split[1] == 'lad.csv'):
+
+                                # Calculate relative pop percentage of ONS scenarios
+                                msoa_principalDF = pd.read_csv(path_MSOA_baseline)
+                                msoa_principalDF_selection_2015 = msoa_principalDF.loc[msoa_principalDF['year'] == 2015]
+
+                                # LADs and calculate factor per MSOA
+                                factor_msoas = {}
+                                for lad, msoas in LAD_MSOA_lu.items():
+                                    tot_pop_lad = 0
+                                    for msoa in msoas:
+                                        tot_pop_lad += float(msoa_principalDF_selection_2015.loc[msoa_principalDF_selection_2015['region'] == msoa]['value'])
+                                    for msoa in msoas:
+                                        pop_msoa = float(msoa_principalDF_selection_2015.loc[msoa_principalDF_selection_2015['region'] == msoa]['value'])
+                                        factor_msoas[msoa] = pop_msoa / tot_pop_lad #calculate fator
+
+                                list_with_all_vals = []
+                                gp_file = pd.read_csv(file_path)
+
+                                for index, row_lad in gp_file.iterrows():
+                                    lad = row_lad['region']
+                                    try:
+                                        corresponding_msoas = LAD_MSOA_lu[lad]
+                                    except KeyError:
+                                        corresponding_msoas = [lad] # No match for northern ireland
+
+                                    # Calculate population according to ONS 2015
+                                    pop_LAD = row_lad['value']
+
+                                    for msoa_name in corresponding_msoas:
+                                        try:
+                                            pop_ONS_scale_factor = factor_msoas[msoa_name]
+                                        except:
+                                            pop_ONS_scale_factor = 1 # If not mapped
+
+                                        pop_MSOA_ONS_scaled = pop_LAD * pop_ONS_scale_factor
+
+                                        new_row = {
+                                            'region': msoa_name,
+                                            "timestep": row_lad['year'],
+                                            "value": pop_MSOA_ONS_scaled}
+
+                                        list_with_all_vals.append(new_row)
+
+                                msoaDF = pd.DataFrame(list_with_all_vals, columns=gp_file.columns)
+                                file_path_MSOA_out = os.path.join(path_to_folder, folder_name, "{}_{}.csv".format(file_name[:-4], "MSOA"))
+                                msoaDF.to_csv(file_path_MSOA_out, index=False)
+                    except:
+                        print("... error in preparing data")
+                        pass
+                else:
+                    pass
 
+                # ----------------------------------------------------------
+                # Script to generate sectors file
+                # ----------------------------------------------------------
+                if (filename_split[0] == "gva_per_head" and filename_split[1] == 'lad_sector.csv'):
+
+                    #try:
+                        file_path = os.path.join(path_to_folder, folder_name, file_name)
+                        df = pd.read_csv(file_path)
+
+                        df = df.rename(index=str, columns={"year": "timestep"})
+                        df = df.rename(index=str, columns={"economic_sector__gor": "sectors"})
+                        df = df.rename(index=str, columns={"gva_per_head": "gva_per_sector"})
+
+                        # Drop columns
+                        try:
+                            df = df.drop('interval', 1)
+                        except:
+                            pass
+                        # Select all entries with matching sectors
+                        df = df.loc[df['sectors'].isin(sectors_to_generate)]
+
+                        # Select all entries with matching years
+                        df = df.loc[df['timestep'].isin(range(base_yr, end_yr + 1))]
+
+                        # Write to csv
+                        file_path_sectors = os.path.join(path_to_folder, folder_name, "gva_per_head__lad_sectors.csv")
+                        df.to_csv(file_path_sectors, index=False)
+                    #except:
+                    #    pass #Error
+
+                # -----------------------------------------
+                # MSOA GVA calculations
+                # -----------------------------------------
+                if MSOA_calculations:
+                    if (filename_split[0] == "gva_per_head" and filename_split[1] == 'lad.csv'):
+
+                        try:
                             list_with_all_vals = []
-                            gp_file = pd.read_csv(file_path)
 
-                            for index, row_lad in gp_file.iterrows():
-                                lad = row_lad['region']
+                            file_path = os.path.join(path_to_folder, folder_name, file_name)
+                            lads = list(gp_file.loc[gp_file['year'] == 2015]['region'])
+
+                            for lad in lads:
                                 try:
                                     corresponding_msoas = LAD_MSOA_lu[lad]
                                 except KeyError:
-                                    corresponding_msoas = [lad] # No match for northern ireland
+                                    # No match for northern ireland
+                                    corresponding_msoas = [lad]
 
-                                # Calculate population according to ONS 2015
-                                pop_LAD = row_lad['value']
+                                rows_msoa = gp_file.loc[gp_file['region'] == lad]
 
-                                for msoa_name in corresponding_msoas:
-                                    try:
-                                        pop_ONS_scale_factor = factor_msoas[msoa_name]
-                                    except:
-                                        pop_ONS_scale_factor = 1 # If not mapped
-
-                                    pop_MSOA_ONS_scaled = pop_LAD * pop_ONS_scale_factor
+                                for index, row_msoa in rows_msoa.iterrows():
+                                    for msoa_name in corresponding_msoas:
+                                        new_row = {
+                                            "region": msoa_name,
+                                            "timestep": row_msoa['year'],
+                                            "value": row_msoa['value']}
+                                        list_with_all_vals.append(new_row)
 
-                                    new_row = {
-                                        'region': msoa_name,
-                                        "timestep": row_lad['year'],
-                                        "value": pop_MSOA_ONS_scaled}
+                            # Convert list to dataframe
+                            msoaDF = pd.DataFrame(list_with_all_vals, columns=gp_file.columns)
 
-                                    list_with_all_vals.append(new_row)
+                            msoaDF = msoaDF.rename(index=str, columns={"region": geography_name})
 
-                            msoaDF = pd.DataFrame(list_with_all_vals, columns=gp_file.columns)
                             file_path_MSOA_out = os.path.join(path_to_folder, folder_name, "{}_{}.csv".format(file_name[:-4], "MSOA"))
                             msoaDF.to_csv(file_path_MSOA_out, index=False)
-                except:
-                    #ERROR
-                    pass
-            else:
-                pass
-
-            # ----------------------------------------------------------
-            # Script to generate sectors file
-            # ----------------------------------------------------------
-            if (filename_split[0] == "gva_per_head" and filename_split[1] == 'lad_sector.csv'):
-
-                try:
-                    file_path = os.path.join(path_to_folder, folder_name, file_name)
-                    df = pd.read_csv(file_path)
-
-                    df = df.rename(
-                        index=str,
-                        columns={
-                            "year": "timestep",
-                            "economic_sector__gor": "sector"})
-
-                    # Drop columns
-                    try:
-                        df = df.drop('interval', 1)
-                    except:
-                        pass
-
-                    # Select all entries with matching sectors
-                    df = df.loc[df['sector'].isin(sectors_to_generate)]
-
-                    # Select all entries with matching years
-                    df = df.loc[df['timestep'].isin(range(base_yr, end_yr + 1))]
-
-                    # Reorder columns
-                    df = df[columns]
-
-                    # Write to csv
-                    file_path_sectors = os.path.join(path_to_folder, folder_name, "gva_per_head__lad_sectors.csv")
-                    df.to_csv(file_path_sectors, index=False)
-                except:
-                    pass #Error
-
-            # -----------------------------------------
-            # MSOA GVA calculations
-            # -----------------------------------------
-            if MSOA_calculations:
-                if (filename_split[0] == "gva_per_head" and filename_split[1] == 'lad.csv'):
-
-                    try:
-                        list_with_all_vals = []
-
-                        file_path = os.path.join(path_to_folder, folder_name, file_name)
-                        lads = list(gp_file.loc[gp_file['year'] == 2015]['region'])
-
-                        for lad in lads:
-                            try:
-                                corresponding_msoas = LAD_MSOA_lu[lad]
-                            except KeyError:
-                                # No match for northern ireland
-                                corresponding_msoas = [lad]
-
-                            rows_msoa = gp_file.loc[gp_file['region'] == lad]
-
-                            for index, row_msoa in rows_msoa.iterrows():
-                                for msoa_name in corresponding_msoas:
-                                    new_row = {
-                                        "region": msoa_name,
-                                        "timestep": row_msoa['year'],
-                                        "value": row_msoa['value']}
-                                    list_with_all_vals.append(new_row)
-
-                        # Convert list to dataframe
-                        msoaDF = pd.DataFrame(list_with_all_vals, columns=gp_file.columns)
-
-                        msoaDF = msoaDF.rename(index=str, columns={"region": geography_name})
-
-                        file_path_MSOA_out = os.path.join(path_to_folder, folder_name, "{}_{}.csv".format(file_name[:-4], "MSOA"))
-                        msoaDF.to_csv(file_path_MSOA_out, index=False)
-                    except:
-                        pass
+                        except:
+                            pass
 
     print("----------")
     print("Finished preparing NISMOD GVA and population files")