In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import tarfile
import os
import csv

import geopandas as gpd
from zipfile import ZipFile

In [2]:
shape_file = "DATA/Estações_localizacao/Localizacao_Remotas-FINAL.shp"
shape = gpd.read_file(filename=shape_file)

In [3]:
shape.crs

{'init': 'epsg:4326'}

## WGS84
- Sistema de coordenada Projetado
- Sistema de coordenada Geodésico

In [None]:
# Informações disponivel no inicio dos arquivos do radar.
cellsize = 0.005825080633
ncols = 835
nrows = 779
lat_min = -22.542179540896
lat_max = lat_min + cellsize * nrows
lon_min = -56.903502116470
lon_max = lon_min + cellsize * ncols

In [None]:
def find_nearest_index(array, value):
    idx = (np.abs(array - value)).argmin()
    return idx

In [None]:
# Obtendo o index correspondente com cada coordenada.
# 
lat = np.linspace(lat_max, lat_min, nrows, endpoint=True)
lat = np.around(lat, decimals=4)

lon = np.linspace(lon_min, lon_max, ncols, endpoint=True)
lon = np.around(lon, decimals=4)


shape['longitude'] = shape['geometry'].apply(lambda x: x.x)
shape['latitude'] = shape['geometry'].apply(lambda x: x.y)

shape['index_longitude'] = shape['longitude'].apply(lambda x: find_nearest_index(lon, x))
shape['index_latitude'] = shape['latitude'].apply(lambda x: find_nearest_index(lat, x))

In [None]:
shape.head()

In [None]:
## Trocando o espaço nos nomes das estações por underline (_)
## MB SEG2 >> MB_SEG2

points = shape.Name.tolist()
points = list(map(lambda x: x.replace(" ", "_"), points))
shape.Name = points
shape = shape.set_index('Name')
shape.head()

In [None]:
shape[shape.index == "MB_PRO1"]

In [None]:
# Pastas contendo os arquivos zips
path = 'DATA/Dados_radar_new/'
folders = [f.path for f in os.scandir(path) if f.is_dir()]
folders.sort()

In [None]:
## 

from tqdm import tqdm, tqdm_notebook
import logging


logging.basicConfig(filename='DATA/extractFiles.log',level=logging.DEBUG)


with open('DATA/data.csv','w') as f_out:
    
    logging.info("Starting!\nCreating file %s",f_out)
    out_colnames = ['date']
    out_colnames += points
    
    writer = csv.DictWriter(f_out, fieldnames=out_colnames)
    writer.writeheader()
    path = 'DATA/Dados_radar_new/'
    folders = [f.path for f in os.scandir(path) if f.is_dir()]

    

    for folder in tqdm_notebook(folders,desc="Folders"):
        logging.info("Folder: %s", folder)

        zipfiles = [f.path for f in os.scandir(folder) if not f.is_dir()]
        
        
        for zipfile in tqdm_notebook(zipfiles, desc="ZipFiles", leave= False):
            logging.info("Zipfile: %s", zipfile)
            
            with ZipFile(zipfile,'r') as myzip:
                for file in tqdm_notebook(myzip.namelist(),desc="Files",leave= False):
                    logging.info("File: %s",file)
                    with myzip.open(file,'r') as data:      
                        new_point = {}
                        year   = file[:4]
                        month  = file[4:6]
                        day    = file[6:8]
                        hour   = file[8:10]
                        minute = file[10:12]
                        
                        #Dateformat %Y/%m/%d %H:%M
                        new_point['date'] = "{}/{}/{} {}:{}".format(year,month,day,hour,minute)
                        try:
                            array = np.loadtxt(data,skiprows=6)


                            if array.shape == (779,835):
                                array = np.power([10],array / 10)

                                for point in points:

                                    lat = shape.loc[point]['index_latitude']
                                    long = shape.loc[point]['index_longitude']
                                    
                                    ## Irei extrai um quadrado de 3x3 pixeis em volta
                                    ## do pixel. Isso será ultil para fazer uma média dos
                                    ## pixeis envolta do pixel desejado.
                                    
                                    lat_min = lat - 2
                                    lat_max = lat + 3 
                                    long_min = long - 2
                                    long_max = long + 3
                                    points_array = array[lat_min:lat_max,long_min:long_max]
                                    points_array = points_array.reshape(25,)
                                    points_string = np.array2string(points_array)
                                    
                                    for symbol in ['[',']']:
                                        points_string = points_string.replace(symbol,'')
                                    

                                    new_point[point] = points_string
                            else:
                                #nome dos arquivos para uma futura investigação
                                logging.warn("File wrong size: %s")

                            writer.writerow(new_point)
                        except ValueError:
                            logging.warn("File with strange data")
                        except:
                            logging.warn("SOME PROBLEM")
                        

In [None]:
df = pd.read_csv("DATA/5x5_RADAR.csv",
            parse_dates=['DATE'],
            index_col=["DATE"])

In [None]:
df.head()