In [3]:
import pandas as ps
import rasterio
import numpy as np
from rasterio.transform import from_bounds
from rasterio.plot import show, show_hist
from rasterio.fill import fillnodata
import os



In [4]:
# base_dir = r"D:\Acadamics\University\Year_3\Sem_2\GIS3005 - Remote Sensing\Assignment\1"
base_dir = os.getcwd()
countries_interested = ["Pakistan", "Nepal", "Sri Lanka", "India", "Bhutan", "Bangladesh"]
gdp_dataset = os.path.join(base_dir, r"GDP\API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4770391.csv")
gdp_dataset_filtered = os.path.join(base_dir, r"GDP\API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4770391_filtered.csv")
nightlight = os.path.join(base_dir, r"nightlight.csv")
output_folder = os.path.join(base_dir, r"output")
year_from = 1992
year_to = 2013

In [5]:
'''
Filter WorldBank's current GDP dataset
'''
df = ps.read_csv(gdp_dataset, skiprows=0, na_values=None)
df.drop(['Unnamed: 66'], axis=1, inplace=True)
df.rename(columns={'Country Name':'Name', 'Country Code':'Code', 'Indicator Name':'IName', 'Indicator Code':'ICode'}, inplace=True)
# print(df.head())

df = df.drop([str(x) for x in range(1960, 1992)] + ['IName', 'ICode'], axis=1)
df = df[df.Name.isin(countries_interested)]
df = df.set_index('Name')
print(df.head())

df.to_csv(gdp_dataset_filtered, index=True)


           Code          1992          1993          1994          1995  \
Name                                                                      
Bangladesh  BGD  3.170887e+10  3.316652e+10  3.376866e+10  3.793975e+10   
Bhutan      BTN  2.402158e+08  2.259981e+08  2.589856e+08  2.904648e+08   
India       IND  2.882084e+11  2.792960e+11  3.272756e+11  3.602820e+11   
Sri Lanka   LKA  9.703012e+09  1.033868e+10  1.171760e+10  1.302970e+10   
Nepal       NPL  3.401212e+09  3.660042e+09  4.066776e+09  4.401104e+09   

                    1996          1997          1998          1999  \
Name                                                                 
Bangladesh  4.643848e+10  4.824431e+10  4.998456e+10  5.127057e+10   
Bhutan      3.034355e+08  3.522610e+08  3.634528e+08  3.992688e+08   
India       3.928971e+11  4.158678e+11  4.213515e+11  4.588204e+11   
Sri Lanka   1.389774e+10  1.509191e+10  1.579497e+10  1.565633e+10   
Nepal       4.521580e+09  4.918692e+09  4.856255e+09  

In [6]:
class progress():
    def __init__(self, current, maximum):
        self.current = current
        self.maximum = maximum
        self.BAR_WIDTH = 40
        
        self.current -=1
        self.next()
        
        
    def next(self):
        self.current += 1
        x = int(self.BAR_WIDTH*self.current/self.maximum)
        y = round(self.current/self.maximum*100, 1)
        text_pb = "{}[{}{}] {}/{} {}%".format("Processing", "#"*x, "."*(self.BAR_WIDTH-x), self.current, self.maximum, y)
        print(text_pb, end='\r', file=sys.stdout, flush=True)

In [7]:
def get_files(path, extention=".tif", regex=""):
    files = []
    for file in os.listdir(path):
        if file.endswith(extention):
            if regex != "":
                if not re.match(regex, file): continue
            temp_path = os.path.join(path, file)
            if os.path.exists(temp_path):
                files.append(temp_path)
    return files

In [8]:
def load(npy_file, band=None):
    if os.path.isfile(npy_file):
        if band == None: return np.load(npy_file)
        return np.load(npy_file)[band]
    return None

In [9]:
'''
Process Night Light Entensity npy
'''
npys = get_files(output_folder, '.npy')
npys = [{'npy':load(x)['b1'], 'country':os.path.splitext(os.path.split(x)[-1])[0].split("_")[0], 'year':int(os.path.splitext(os.path.split(x)[-1])[0].split("_")[-1])} for x in npys]


In [10]:
df = ps.DataFrame(npys)

In [12]:
data = {"country":[], "year":[], 'nightlight':[]}
for country in countries_interested:
    nldf = df[df.country == country]

    for index, item in nldf.iterrows():
        b1 = item.npy
        # show(b1)
        # show_hist(b1, bins=100)

        mask = b1[::] > 3
        fillnodata(b1, mask=mask)

        mask = b1[::] < 60
        fillnodata(b1, mask=mask)
        
        data["country"].append(country)
        data["year"].append(item.year)
        data["nightlight"].append(b1.mean())

In [13]:
df = ps.DataFrame(data)
df.set_index('country')

Unnamed: 0_level_0,year,nightlight
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Pakistan,1992,4.379253
Pakistan,1993,4.252737
Pakistan,1994,4.525597
Pakistan,1995,4.742671
Pakistan,1996,4.346322
...,...,...
Bangladesh,2009,5.076833
Bangladesh,2010,4.824675
Bangladesh,2011,4.760105
Bangladesh,2012,5.187551


In [14]:
df.to_csv(nightlight, index=True)