### Code used for reading and converting raster files to array


#### This code provide an automatic read of Bilraster files into array, and then organize the data in a time series to be used for further analysis.

#### The code will read a total of Z .map files, and will reshape each file from a (X, Y) to a (1, X * Y), and will assing each file matrix to a different line. Therefore we will have a time-series matrix of (Z, X * Y) shape. 

Developed by: Thiago Victor Medeiros do Nascimento

In [1]:
from pcraster import *
import numpy as np
from osgeo import gdal, gdalconst
from osgeo import gdal_array
from osgeo import osr
import matplotlib.pylab as plt
import subprocess
import glob,os
import time
import rasterio
import tqdm

In [2]:
import pandas as pd

Firstly we pre-read our dataset:

#### (a) Data lecture:

In [3]:
path =r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\dnCHIRPS'
filenames = glob.glob(path + "/*.bil")
print("The total number of files is:", len(filenames))

The total number of files is: 468


Firstly we take a look on the dataset:

In [4]:
mapfile = filenames[0]

RasterLayer = rasterio.open(mapfile)

ncols = RasterLayer.width
nrows = RasterLayer.height

numtotal = nrows*ncols

print("The total number of grids in the dataset is:", numtotal)

The total number of grids in the dataset is: 16330


In [6]:
nrows

115

In [9]:
RasterLayer

<open DatasetReader name='C:/Users/User/OneDrive/IST/RESEARCH/5_SPI/rondonia/dnCHIRPS/dnchirps198101.bil' mode='r'>

We create one array to be filled:

In [16]:
precmongrids = np.zeros((len(filenames),numtotal),dtype=np.float32)
precmongrids.shape

(468, 16330)

Now we proceed with the data lecture and organization:

In [17]:
RasterLayer = rasterio.open(mapfile)
mapreadarray = RasterLayer.read()[0,:,:]
mapreadarrayres = np.reshape(mapreadarray, (1, numtotal))
mapreadarrayres

array([[275, 279, 271, ..., 417, 403, 382]], dtype=int16)

In [19]:
precmongrids.shape

(468, 16330)

In [27]:
start = time.time()
i = 0
for mapfile in tqdm.tqdm(filenames):

    #namewithmap = os.path.basename(mapfile)
    #namemap = namewithmap.replace(".bil", "")
    #namemap = namemap.replace("dnchirps", "")
    
    
    RasterLayer = rasterio.open(mapfile)
    mapreadarray = RasterLayer.read()[0,:,:]
    mapreadarrayres = np.reshape(mapreadarray, (1, numtotal))
    precmongrids[i,:] = mapreadarrayres
    
    i = i + 1
end = time.time()
print(end - start)

100%|████████████████████████████████████████████████████████████████████████████████| 468/468 [00:07<00:00, 61.88it/s]

7.5892064571380615





In [26]:
precmongrids.shape

(468, 16330)

In [20]:
precmongridsdf = pd.DataFrame(index = pd.date_range(start='1-1981', end='1-2020', freq='M') , data = precmongrids)

In [21]:
precmongridsdf

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16320,16321,16322,16323,16324,16325,16326,16327,16328,16329
1981-01-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1981-02-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1981-03-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1981-04-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1981-05-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-09-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now we export our dataset to a .CSV to be easily opened afterwards:

In [9]:
#np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\precmongrids.csv', precmongrids, delimiter=',')

If we want to save our datafrane in a .XLSX file:

In [10]:
#precmongridsdf.to_excel(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\precmongrids.xlsx')

#### (b) SPI computation for each grid:

In [18]:
import standard_precip
from standard_precip import spi

In [53]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
spi_rain = spi.SPI()

In [153]:
data = pd.read_csv(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\precmongrids.csv')

In [154]:
numgrids = data.shape[1] - 1
numgrids

16330

In [155]:
tipos_de_spi = [1, 3, 6, 9, 12, 24]

In [156]:
spi3 = data.copy()
spi3.iloc[:,:] = np.nan
spi3.drop(['date'], axis=1, inplace = True)

In [157]:
spi1 = spi3.copy()
spi6 = spi3.copy()
spi9 = spi3.copy()
spi12 = spi3.copy()
spi24 = spi3.copy()

In [158]:
start = time.time()
    
for grid in tqdm.tqdm(range(numgrids)):
#for grid in tqdm.tqdm(range(1)):

    # SPI-1
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=1, fit_type="lmom", dist_type="gam")    
    spi1.iloc[:, grid] = aux.iloc[:,2]    
    #data.drop(data.columns[-1], axis=1, inplace = True)
    
    # SPI-3
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=3, fit_type="lmom", dist_type="gam")    
    spi3.iloc[:, grid] = aux.iloc[:,2]    
    data.drop(data.columns[-1], axis=1, inplace = True)
    
    # SPI-6
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=6, fit_type="lmom", dist_type="gam")    
    spi6.iloc[:, grid] = aux.iloc[:,2]    
    data.drop(data.columns[-1], axis=1, inplace = True)   
    
    # SPI-9
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=9, fit_type="lmom", dist_type="gam")    
    spi9.iloc[:, grid] = aux.iloc[:,2]    
    data.drop(data.columns[-1], axis=1, inplace = True)
    
    # SPI-12
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=12, fit_type="lmom", dist_type="gam")    
    spi12.iloc[:, grid] = aux.iloc[:,2]    
    data.drop(data.columns[-1], axis=1, inplace = True)
    
    # SPI-24
    aux = spi_rain.calculate(data, 'date', str(grid), freq="M", scale=24, fit_type="lmom", dist_type="gam")    
    spi24.iloc[:, grid] = aux.iloc[:,2]    
    data.drop(data.columns[-1], axis=1, inplace = True)
    
end = time.time()
print(end - start)

100%|██████████████████████████████████████████████████████████████████████████| 16330/16330 [3:59:19<00:00,  1.14it/s]

14359.44906115532





In [167]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi1.csv', spi1, delimiter=',')

In [168]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi3.csv', spi3, delimiter=',')

In [169]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi6.csv', spi6, delimiter=',')

In [170]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi9.csv', spi9, delimiter=',')

In [171]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi12.csv', spi12, delimiter=',')

In [172]:
np.savetxt(r'C:\Users\User\OneDrive\IST\RESEARCH\5_SPI\rondonia\spi24.csv', spi24, delimiter=',')