In [2]:
import pandas as pd
import numpy as np
from simpledbf import Dbf5
import os
import glob


# Defining a blank dataframe to store the zone wise paramter value at different scenario
com_df = pd.DataFrame()

# Iterating through all different model folders
for folder in glob.glob(r'E:\Script\Reading_dbf_file_Trial\Bangladesh\**'):
    
    # Splitting the folder path to get the model name
    folder_path_list = folder.split(os.sep)
    model_name = folder_path_list[-1]
    
    # Defining a blank dataframe to store the individual model information
    m_df = pd.DataFrame()
    
    # Iterating through all the scenario folder of a specific model
    for scenario_folder in glob.glob(r'E:\Script\Reading_dbf_file_Trial\Bangladesh\{}\**'.format(model_name)):
        
        # Defining a blank dataframe to store the information of that partcular scenario
        s_df = pd.DataFrame()
        
        # Splitting the scenario folder path to get the scenario name
        scenario_path_list = scenario_folder.split(os.sep)
        scenario = scenario_path_list[-1]
    
        # Iterating through all the different parameter file to get the parameter wise information
        for file in glob.glob(r'E:\Script\Reading_dbf_file_Trial\Bangladesh\{}\{}\*.dbf'.format(model_name,scenario)):

            # Spliting the file path to get the parameter name, region and timespan information
            file_path_list = file.split(os.sep)
            file_name_list = file_path_list[-1].split('_')
            
            # Setting the parameter name
            item_type = file_name_list[0]
            
            # Setting the region name
            region = file_name_list[-1][:-4]
            
            # Setting the timespan
            timespan_type = file_name_list[-2]
            
            # Reading the database file using 'Dbf5' method of 'simpledbf' package
            dbf = Dbf5(file)              

            # Converting the database file to pandas dataframe
            df = dbf.to_dataframe()
            
            # Calculating the average of particular region from all the grid points data
            # And creating dataframe wth calculated region average value
            avg = df.iloc[:,2:].mean()
            avg_df = pd.DataFrame({item_type:avg})
            
            # Transposing the row and column of the average dataframe, resetting the index and renaming it as 'Parameter' column
            avg_trans_df = avg_df.T
            avg_trans_df.reset_index(inplace=True)
            avg_trans_df.rename(columns = {'index':'Parameter'},inplace=True)
            
            # Inserting three columns of scenario, timespan and region name in the dataframe at index 0,1 and 3 respectively
            avg_trans_df.insert(loc = 0, column = 'Scenario', value = scenario)
            avg_trans_df.insert(loc = 1, column = 'Time-span', value = timespan_type)
            avg_trans_df.insert(loc = 3, column = 'Region', value = region)
            
            # Updating the region average parameter information in the scenario dataframe
            s_df = pd.concat([s_df,avg_trans_df])
        
        # Updating the each scenario information into the model dataframe
        m_df = pd.concat([m_df,s_df])
    
    # Saving individual model inforamtion combined dataframe in the model folder
    m_df.to_excel(r'E:\Script\Reading_dbf_file_Trial\Bangladesh\{n}\{n}.xlsx'.format(n=model_name), index=False,
                 float_format ='%.2f')
    
    # Inserting a model name column at the start of the model dataframe
    m_df.insert(loc = 0, column = 'Model', value = model_name)
    
    # Updating the individual model information into the combined dataframe(com_df)
    com_df = pd.concat([com_df,m_df])

# Saving the combined dataframe
com_df.to_excel(r'E:\Script\Reading_dbf_file_Trial\Summary\All_Model_Combined.xlsx', index=False, float_format ='%.2f')

# Grouping the combined dataframe by ('Scenario','Parameter','Region') to get the summary information
item_grp = com_df.groupby(['Scenario','Parameter','Region'])


# Iterating through each scenario, parameter and Region to get specific information
for snrio in com_df['Scenario'].unique():
    
    for parm in com_df['Parameter'].unique():
        
        # Definig a blank dataframe to store parameter wise data
        p_avg_df = pd.DataFrame()
        
        for rgn in com_df['Region'].unique():
            
            # Getting individual secenario, parameter and region wise data from the group data
            item_df = item_grp.get_group((snrio,parm,rgn))
            
            # Subpressing the pandas 'SettingWithCopyWarning' Warning
            with pd.option_context('mode.chained_assignment', None):
                item_df.sort_values(by='Time-span', ascending=False, inplace=True)
            
            # Calculating wise mean by group the data timespan wise (shot, mid, long term)
            time_grp_df = item_df.groupby('Time-span',sort=False).mean()
            
            # Resetting the index column and inserting the region column at index '0'
            time_grp_df.reset_index(inplace=True)
            time_grp_df.insert(loc = 0, column = 'Region', value = rgn)
            
            # Updating the region wise data to the parameter dataframe
            p_avg_df = pd.concat([p_avg_df, time_grp_df])
            
            # Saving the region wise individual model scenario data
            item_df.to_excel(r'E:\Script\Reading_dbf_file_Trial\Summary\ZS_wise_all_model\{}_{}_{}.xlsx'.format(snrio,parm,rgn),
                            index=None, float_format ='%.2f')
        
        # Saving the scenario specific zone-wise average parameter data
        p_avg_df.to_excel(r'E:\Script\Reading_dbf_file_Trial\Summary\ZP_wise_avg\{}_{}_Avg.xlsx'.format(snrio,parm),
                         index=None, float_format ='%.2f')

In [3]:
p_avg_df

Unnamed: 0,Region,Time-span,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,EH,short,14.985673,16.779214,20.552962,23.57582,25.201265,26.383693,26.078253,25.796179,25.445615,23.886044,20.866234,16.735384
1,EH,mid,16.021973,17.684982,21.337556,24.066059,25.684017,26.942682,26.602941,26.250321,25.95481,24.410835,21.590832,17.614056
2,EH,long,18.892436,20.526212,23.3981,25.675276,27.484404,28.667616,28.05272,27.665317,27.393219,26.038988,23.503003,20.264067
0,NC,short,13.463025,15.415655,19.660029,23.753841,25.990968,27.085339,26.719043,26.449524,25.95699,23.657043,19.432863,14.902999
1,NC,mid,14.505291,16.373652,20.490253,24.337643,26.441724,27.539595,27.168398,26.812741,26.42904,24.268902,20.271427,15.810268
2,NC,long,17.447287,19.279966,22.97271,26.166931,27.902539,28.996433,28.411,27.95577,27.666208,25.95294,22.621387,18.774913
0,NE,short,12.911574,14.8274,18.89371,22.636329,24.873837,26.238048,26.144479,25.981817,25.367821,22.74712,18.639396,14.362434
1,NE,mid,13.951897,15.745958,19.652462,23.200202,25.371803,26.686495,26.581698,26.379944,25.891021,23.364363,19.418484,15.270792
2,NE,long,16.717684,18.468995,21.970866,25.07747,26.841752,28.011596,27.733883,27.479653,27.156459,25.204508,21.625007,17.97382
0,NW,short,12.591121,14.632306,18.829129,23.107656,25.714855,27.045358,26.762776,26.51485,25.850713,23.051002,18.637762,14.190967
