In [3]:
import dask.dataframe as dd

In [6]:
df = dd.read_csv('../RawData/Historical_FiresRAW/icnf_2021.csv')
print(df.head())

ValueError: Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.

+----------------------+---------+----------+
| Column               | Found   | Expected |
+----------------------+---------+----------+
| AREAPOV              | float64 | int64    |
| AREASFICHEIRODBF_GTF | object  | float64  |
| AREASFICHEIROPRJ_GTF | object  | float64  |
| AREASFICHEIROSHP_GTF | object  | float64  |
| AREASFICHEIROSHX_GTF | object  | float64  |
| AREASFICHEIROS_GTF   | object  | float64  |
| AREASFICHEIROZIP_SAA | object  | float64  |
| CAUSA                | float64 | int64    |
| DURACAO              | float64 | int64    |
| OPERADOR             | object  | int64    |
| PERIMETRO            | object  | float64  |
+----------------------+---------+----------+

The following columns also raised exceptions on conversion:

- AREASFICHEIRODBF_GTF
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.dbf'")
- AREASFICHEIROPRJ_GTF
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.prj'")
- AREASFICHEIROSHP_GTF
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.shp'")
- AREASFICHEIROSHX_GTF
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.shx'")
- AREASFICHEIROS_GTF
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.kml'")
- AREASFICHEIROZIP_SAA
  ValueError("could not convert string to float: 'http://fogos.icnf.pt/sgif2010/ficheiroskml/AA_AG1211131.zip'")
- OPERADOR
  ValueError("invalid literal for int() with base 10: 'sigo'")
- PERIMETRO
  ValueError("could not convert string to float: 'Mata Nacional das Dunas de Vila Real de Santo Antonio'")

Usually this is due to dask's dtype inference failing, and
*may* be fixed by specifying dtypes manually by adding:

dtype={'AREAPOV': 'float64',
       'AREASFICHEIRODBF_GTF': 'object',
       'AREASFICHEIROPRJ_GTF': 'object',
       'AREASFICHEIROSHP_GTF': 'object',
       'AREASFICHEIROSHX_GTF': 'object',
       'AREASFICHEIROS_GTF': 'object',
       'AREASFICHEIROZIP_SAA': 'object',
       'CAUSA': 'float64',
       'DURACAO': 'float64',
       'OPERADOR': 'object',
       'PERIMETRO': 'object'}

to the call to `read_csv`/`read_table`.

In [7]:
import pandas as pd
df = pd.read_csv('../RawData/Historical_FiresRAW/icnf_2021.csv')
print(df.head())

   Unnamed: 0 DISTRITO       TIPO   ANO  AREAPOV  AREAMATO  AREAAGRIC  \
0           0     Faro  Florestal  2021      0.0     0.002     0.0000   
1           1     Faro  Florestal  2021      0.0     0.007     0.0000   
2           2     Faro   Agrícola  2021      0.0     0.000     0.0328   
3           3     Faro   Agrícola  2021      0.0     0.000     0.3000   
4           4     Faro  Florestal  2021      0.0     0.474     0.0000   

   AREATOTAL  REACENDIMENTOS  QUEIMADA  ...  AREASFICHEIROS_GTF  \
0     0.0020               0         0  ...                 NaN   
1     0.0070               0         0  ...                 NaN   
2     0.0328               0         0  ...                 NaN   
3     0.3000               0         0  ...                 NaN   
4     0.4740               0         0  ...                 NaN   

   FICHEIROIMAGEM_GNR  AREASFICHEIROSHP_GTF  AREASFICHEIROSHPXML_GTF  \
0                 NaN                   NaN                      NaN   
1             

In [9]:
import pandas as pd
import os

# Directory containing the CSV files
dir_path = '../RawData/Historical_FiresRAW/'

# Initialize total count
total_count = 0

# Iterate over all files in the directory
for filename in os.listdir(dir_path):
    # Check if the file is a CSV file
    if filename.endswith('.csv'):
        # Read the CSV file with dtype set to object to avoid DtypeWarnings
        df = pd.read_csv(os.path.join(dir_path, filename), dtype='object')
        # Add the number of instances (rows) to the total count
        total_count += df.shape[0]

print(f'Total number of instances: {total_count}')

Total number of instances: 865066


In [12]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('../RawData/Historical_FiresRAW/icnf_2021.csv')

# Get a random row
random_row = df.sample(1)

# Print the specified columns
print(random_row[['TIPO', 'ANO', 'AREAMATO', 'AREATOTAL',
                   'QUEIMADA', 'FALSOALARME', 'HORAALERTA', 
                   'LOCAL', 'CONCELHO', 'FREGUESIA', 
                   'X', 'Y', 'DIA', 
                   'MES', 'HORA', 'LAT', 'LON']])

           TIPO   ANO  AREAMATO  AREATOTAL  QUEIMADA  FALSOALARME HORAALERTA  \
5530  Florestal  2021       0.0        0.0         0            0      13:24   

                                                  LOCAL CONCELHO  \
5530  AV GENERAL MARIO FIRMINO MIGUEL 2710-553_SINTR...   Sintra   

                              FREGUESIA      X       Y  DIA  MES  HORA  \
5530  Sintra (Santa Maria e São Miguel)  92582  204785    5    9    13   

            LAT      LON  
5530  38.804078 -9.36967  


           TIPO   ANO  AREAMATO  AREATOTAL  QUEIMADA  FALSOALARME HORAALERTA  \
1277  Florestal  2021    0.0003     0.0003         0            0      11:33   

                                           LOCAL   CONCELHO  FREGUESIA  \
1277  Quinta da Fonte Nova (EN233 (EN233) Km 61)  Penamacor  Penamacor   

           X       Y  DIA  MES  HORA        LAT      LON  
1277  281256  355640   20    5    11  40.165448 -7.17927

In [16]:

import urllib.request
import sys

import csv
import codecs
        
try: 
  ResultBytes = urllib.request.urlopen("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/40.165448%2C%20-7.17927/2021-09-05/2021-09-05?unitGroup=metric&include=hours&key=|APIKEY|&contentType=csv")
  
  # Parse the results as CSV
  CSVText = csv.reader(codecs.iterdecode(ResultBytes, 'utf-8'))

  with open('output.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(CSVText)
        
except urllib.error.HTTPError  as e:
  ErrorInfo= e.read().decode() 
  print('Error code: ', e.code, ErrorInfo)
  sys.exit()
except  urllib.error.URLError as e:
  ErrorInfo= e.read().decode() 
  print('Error code: ', e.code,ErrorInfo)
  sys.exit()

In [21]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('output.csv')

# Print all column names
print(df.columns.tolist())

# Get a random row
random_row = df.sample(1)

# Print the random row
print(random_row[['temp', 'dew', 'humidity', 'precip', 'precipprob', 'preciptype', 'snow', 'snowdepth', 'windgust', 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility', 'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'conditions']])

['name', 'datetime', 'temp', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'preciptype', 'snow', 'snowdepth', 'windgust', 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility', 'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'conditions', 'icon', 'stations']
    temp  dew  humidity  precip  precipprob  preciptype  snow  snowdepth  \
10  20.9  8.2     43.95       0           0         NaN   NaN        NaN   

    windgust  windspeed  winddir  sealevelpressure  cloudcover  visibility  \
10       NaN       11.7    122.0            1016.6        62.7         NaN   

    solarradiation  solarenergy  uvindex  severerisk        conditions  
10             347          1.2        3         NaN  Partially cloudy  
