<h1>Limpagem de dados Termosal para inserçao MongoDB</h1>
<br>
Este notebook foi criado por <b>Bruno da Silva Andrade</b>. Aqui irei crair a rotina de limpagem e estruturação para a inserção no banco de dados MongoDB

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import codecs 
import json

<h2>Definindo estruturas de colunas</h2>
<b>Tipo 1 de dado</b>
<br>
<p>Num Data Fields   :  12                  
Field1            : latitude (Latitude[deg])<br>
Field2            : longitude (Longitude[deg])<br>
Field3            : c0S/m (Conductivity[S/m])<br>
Field4            : sal00 (Salinity[PSU])  <br>
Field5            : t090C (Temperature[ITS-90, deg C])<br>
Field6            : TimeS (Time, Elapsed[seconds])<br>
Field7            : TimeM (Time, Elapsed[minutes])<br>
Field8            : TimeH (Time, Elapsed[hour])<br>
Field9            : TimeJ (Julian Days)<br>
Field10           : svCM (Sound Velocity)<br>
Field11           : prM (Pressure[db])<br>
Field12           : flag (0.000e+00)</p>

In [9]:
dtype = [('latitude', 'f4'),('longitude', 'f4'),('c0S/m', 'f4'), ('sal00', 'f4'), ('t090C','f4'), ('TimeS','f4'), ('TimeM','f4'), ('TimeH','f4'), ('TimeJ','f4'), ('svCM','f4'), ('prM','f4'), ('flag','f4')]

form = dict(usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), dtype=dtype)

rs_cnv = np.genfromtxt('./data/TERMO.cnv', skip_header=57, **form)

<h2>Exibindo dados do dataset em seu formato bruto</h2>

In [10]:
rs_cnv

array([(1006.679,  4.003 ,  4.0054, 3.23071 , 3.231212, 0.051282, 3.60811, 1014.713, 1482.26, 1014.713, 43076.,  1.),
       (1006.679,  4.0035,  4.0056, 3.230776, 3.231246, 0.051282, 3.60806, 1014.713, 1482.26, 1014.713, 43077.,  1.),
       (1006.725,  4.0044,  4.0054, 3.2308  , 3.231246, 0.051282, 3.60801, 1014.76 , 1482.27, 1014.76 , 43078.,  1.),
       (1006.679,  4.0047,  4.0054, 3.230866, 3.231232, 0.050061, 3.61361, 1014.713, 1482.27, 1014.713, 43079.,  1.),
       (1006.679,  4.0054,  4.0054, 3.230909, 3.231192, 0.051282, 3.61355, 1014.713, 1482.27, 1014.713, 43080.,  1.),
       (1006.679,  4.0059,  4.0056, 3.230897, 3.231219, 0.052503, 3.60786, 1014.713, 1482.27, 1014.713, 43081.,  1.),
       (1006.679,  4.0054,  4.0053, 3.230897, 3.231192, 0.052503, 3.60789, 1014.713, 1482.27, 1014.713, 43082.,  1.),
       (1006.679,  4.0057,  4.0058, 3.230957, 3.231239, 0.051282, 3.60785, 1014.714, 1482.27, 1014.714, 43083.,  1.),
       (1006.679,  4.006 ,  4.0054, 3.230951, 3.231259, 

<h2>Colocando os datasets em formato JSON para posteriormente a inserção no banco </h2>

In [11]:
for d in rs_cnv:
        print('{\n\t"latitude":',d[0],',\n\t"longitude":',d[1],',\n\t"c0S/m":',d[2],',\n\t"sal00":',d[3],',\n\t"t090C":',d[4],',\n\t"TimeS":',d[5],',\n\t"TimeM":',d[6],',\n\t"TimeH":',d[7],',\n\t"TimeJ":',d[8],',\n\t"svCM":',d[9],',\n\t"prM":',d[10],',\n\t"flag":',d[11],'\n},')

{
	"latitude": 1006.679 ,
	"longitude": 4.003 ,
	"c0S/m": 4.0054 ,
	"sal00": 3.23071 ,
	"t090C": 3.231212 ,
	"TimeS": 0.051282 ,
	"TimeM": 3.60811 ,
	"TimeH": 1014.713 ,
	"TimeJ": 1482.26 ,
	"svCM": 1014.713 ,
	"prM": 43076.0 ,
	"flag": 1.0 
},
{
	"latitude": 1006.679 ,
	"longitude": 4.0035 ,
	"c0S/m": 4.0056 ,
	"sal00": 3.230776 ,
	"t090C": 3.231246 ,
	"TimeS": 0.051282 ,
	"TimeM": 3.60806 ,
	"TimeH": 1014.713 ,
	"TimeJ": 1482.26 ,
	"svCM": 1014.713 ,
	"prM": 43077.0 ,
	"flag": 1.0 
},
{
	"latitude": 1006.725 ,
	"longitude": 4.0044 ,
	"c0S/m": 4.0054 ,
	"sal00": 3.2308 ,
	"t090C": 3.231246 ,
	"TimeS": 0.051282 ,
	"TimeM": 3.60801 ,
	"TimeH": 1014.76 ,
	"TimeJ": 1482.27 ,
	"svCM": 1014.76 ,
	"prM": 43078.0 ,
	"flag": 1.0 
},
{
	"latitude": 1006.679 ,
	"longitude": 4.0047 ,
	"c0S/m": 4.0054 ,
	"sal00": 3.230866 ,
	"t090C": 3.231232 ,
	"TimeS": 0.050061 ,
	"TimeM": 3.61361 ,
	"TimeH": 1014.713 ,
	"TimeJ": 1482.27 ,
	"svCM": 1014.713 ,
	"prM": 43079.0 ,
	"flag": 1.0 
},
{
	"latitude": 100

	"TimeH": 854.702 ,
	"TimeJ": 1484.18 ,
	"svCM": 854.702 ,
	"prM": 49729.0 ,
	"flag": 3.0 
},
{
	"latitude": 848.206 ,
	"longitude": 5.0883 ,
	"c0S/m": 5.0857 ,
	"sal00": 3.325547 ,
	"t090C": 3.325663 ,
	"TimeS": 0.052503 ,
	"TimeM": 3.52038 ,
	"TimeH": 854.649 ,
	"TimeJ": 1484.18 ,
	"svCM": 854.649 ,
	"prM": 49730.0 ,
	"flag": 3.0 
},
{
	"latitude": 848.206 ,
	"longitude": 5.0882 ,
	"c0S/m": 5.0873 ,
	"sal00": 3.325547 ,
	"t090C": 3.325884 ,
	"TimeS": 0.052503 ,
	"TimeM": 3.52039 ,
	"TimeH": 854.649 ,
	"TimeJ": 1484.18 ,
	"svCM": 854.649 ,
	"prM": 49731.0 ,
	"flag": 3.0 
},
{
	"latitude": 848.259 ,
	"longitude": 5.0873 ,
	"c0S/m": 5.0878 ,
	"sal00": 3.325462 ,
	"t090C": 3.326068 ,
	"TimeS": 0.052503 ,
	"TimeM": 3.52049 ,
	"TimeH": 854.702 ,
	"TimeJ": 1484.17 ,
	"svCM": 854.702 ,
	"prM": 49732.0 ,
	"flag": 3.0 
},
{
	"latitude": 848.305 ,
	"longitude": 5.0861 ,
	"c0S/m": 5.0892 ,
	"sal00": 3.325322 ,
	"t090C": 3.326157 ,
	"TimeS": 0.053724 ,
	"TimeM": 3.52064 ,
	"TimeH": 854.749 ,
	"Ti

	"TimeJ": 1492.68 ,
	"svCM": 603.327 ,
	"prM": 58760.0 ,
	"flag": 5.0 
},
{
	"latitude": 599.468 ,
	"longitude": 8.2466 ,
	"c0S/m": 8.2456 ,
	"sal00": 3.622039 ,
	"t090C": 3.622223 ,
	"TimeS": 0.054945 ,
	"TimeM": 3.27746 ,
	"TimeH": 603.658 ,
	"TimeJ": 1492.7 ,
	"svCM": 603.658 ,
	"prM": 58761.0 ,
	"flag": 5.0 
},
{
	"latitude": 599.218 ,
	"longitude": 8.2503 ,
	"c0S/m": 8.2484 ,
	"sal00": 3.622253 ,
	"t090C": 3.622414 ,
	"TimeS": 0.053724 ,
	"TimeM": 3.27708 ,
	"TimeH": 603.406 ,
	"TimeJ": 1492.7 ,
	"svCM": 603.406 ,
	"prM": 58762.0 ,
	"flag": 5.0 
},
{
	"latitude": 599.37 ,
	"longitude": 8.2538 ,
	"c0S/m": 8.2489 ,
	"sal00": 3.622354 ,
	"t090C": 3.622308 ,
	"TimeS": 0.053724 ,
	"TimeM": 3.27693 ,
	"TimeH": 603.559 ,
	"TimeJ": 1492.72 ,
	"svCM": 603.559 ,
	"prM": 58763.0 ,
	"flag": 5.0 
},
{
	"latitude": 599.218 ,
	"longitude": 8.2536 ,
	"c0S/m": 8.2479 ,
	"sal00": 3.622297 ,
	"t090C": 3.622074 ,
	"TimeS": 0.054945 ,
	"TimeM": 3.27688 ,
	"TimeH": 603.407 ,
	"TimeJ": 1492.71 ,
	"svCM"

	"TimeH": 52.614 ,
	"TimeJ": 1521.85 ,
	"svCM": 52.614 ,
	"prM": 78233.0 ,
	"flag": 7.0 
},
{
	"latitude": 52.416 ,
	"longitude": 19.3528 ,
	"c0S/m": 19.3606 ,
	"sal00": 4.868575 ,
	"t090C": 4.87371 ,
	"TimeS": 0.41636 ,
	"TimeM": 2.91742 ,
	"TimeH": 52.713 ,
	"TimeJ": 1521.86 ,
	"svCM": 52.713 ,
	"prM": 78234.0 ,
	"flag": 7.0 
},
{
	"latitude": 52.364 ,
	"longitude": 19.3544 ,
	"c0S/m": 19.3608 ,
	"sal00": 4.869606 ,
	"t090C": 4.873848 ,
	"TimeS": 0.41636 ,
	"TimeM": 2.92068 ,
	"TimeH": 52.66 ,
	"TimeJ": 1521.87 ,
	"svCM": 52.66 ,
	"prM": 78235.0 ,
	"flag": 7.0 
},
{
	"latitude": 52.397 ,
	"longitude": 19.3563 ,
	"c0S/m": 19.3613 ,
	"sal00": 4.870921 ,
	"t090C": 4.874132 ,
	"TimeS": 0.40904 ,
	"TimeM": 2.92043 ,
	"TimeH": 52.693 ,
	"TimeJ": 1521.89 ,
	"svCM": 52.693 ,
	"prM": 78236.0 ,
	"flag": 7.0 
},
{
	"latitude": 29.824 ,
	"longitude": 19.0256 ,
	"c0S/m": 19.0211 ,
	"sal00": 4.759603 ,
	"t090C": 4.759077 ,
	"TimeS": 0.47863 ,
	"TimeM": 3.00233 ,
	"TimeH": 29.991 ,
	"TimeJ": 1519.8

	"TimeM": 3.02675 ,
	"TimeH": 5.55 ,
	"TimeJ": 1519.18 ,
	"svCM": 5.55 ,
	"prM": 83913.0 ,
	"flag": 10.0 
},
{
	"latitude": 5.519 ,
	"longitude": 18.9516 ,
	"c0S/m": 18.9523 ,
	"sal00": 4.744845 ,
	"t090C": 4.745437 ,
	"TimeS": 0.4127 ,
	"TimeM": 3.02677 ,
	"TimeH": 5.55 ,
	"TimeJ": 1519.18 ,
	"svCM": 5.55 ,
	"prM": 83914.0 ,
	"flag": 10.0 
},
{
	"latitude": 5.474 ,
	"longitude": 18.9514 ,
	"c0S/m": 18.9523 ,
	"sal00": 4.744852 ,
	"t090C": 4.745457 ,
	"TimeS": 0.4127 ,
	"TimeM": 3.03028 ,
	"TimeH": 5.504 ,
	"TimeJ": 1519.18 ,
	"svCM": 5.504 ,
	"prM": 83915.0 ,
	"flag": 10.0 
},
{
	"latitude": 5.474 ,
	"longitude": 18.9518 ,
	"c0S/m": 18.9525 ,
	"sal00": 4.744831 ,
	"t090C": 4.745445 ,
	"TimeS": 0.4127 ,
	"TimeM": 3.02674 ,
	"TimeH": 5.504 ,
	"TimeJ": 1519.18 ,
	"svCM": 5.504 ,
	"prM": 83916.0 ,
	"flag": 10.0 
},
{
	"latitude": 5.519 ,
	"longitude": 18.9518 ,
	"c0S/m": 18.9525 ,
	"sal00": 4.744852 ,
	"t090C": 4.745397 ,
	"TimeS": 0.41148 ,
	"TimeM": 3.02676 ,
	"TimeH": 5.55 ,
	"TimeJ": 