# Web Scraping and API

In [1]:
# Import the libraries
import requests
import pandas as pd
import html5lib
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize

# Web Scrapping

In [2]:
# Use the URL of the general web-page. 
url = 'https://datos.jalisco.gob.mx/search/type/dataset?query=incendio&sort_by=changed'
html = requests.get(url).content
soup = BeautifulSoup(html,'html5lib')

In [3]:
# Use findall to determine the best datasets with the given the desired conditions
items = soup.find_all('a')
item_index = [i for i in range(len(items)) if items[i].text.startswith('Incendios forestales en Bosque La Primavera')]

# Add each element into a list
urls = [items[index].attrs['href'] for index in item_index if 'resource' not in items[index].attrs['href']]
urls

['/dataset/incendios-forestales-en-bosque-la-primavera-2019',
 '/dataset/incendios-forestales-en-bosque-la-primavera-septiembre-diciembre-2018',
 '/dataset/incendios-forestales-en-bosque-la-primavera-enero-abril-2018']

In [4]:
# Obtain information of all the desired pages to access the wanted information

# The general URL IS:
URL = "https://datos.jalisco.gob.mx"
soups = [BeautifulSoup(requests.get(URL+page).content, 'html5lib') for page in urls]

# Optain the links for the csv that have the data in a list. 
csv_url = [soup.find_all('a', {'class':'btn btn-primary data-link'})[0].attrs['href'] for soup in soups]
csv_url

# NEXT STEPS DIDN'T WORK!!!!

#final_records = [BeautifulSoup(requests.get(URL+urls).content, 'html5lib') for urls in final_url]
#records = [element.find_all('span', {'class':'recline-results-info'}) for element in final_records]
#for i in final_url:
#    print(URL+i)
#lista = final_records[0].find_all('div',{'class': 'navigation'})
#lista

['https://datos.jalisco.gob.mx/sites/default/files/incendios_forestales_opd_blp-ene-abril-2019.csv',
 'https://datos.jalisco.gob.mx/sites/default/files/bosque_la_primavera_incendios_forestales_sep-dic_2018.csv',
 'https://datos.jalisco.gob.mx/sites/default/files/incendios_forestales_en_blp_enero-abril-18_13jul2018.csv']

In [5]:
# Finally obtain all the data on a list, one can obtain the information of a csv with Beautiful soup!
crude_data = [requests.get(csv).content for csv in csv_url]
texts = [BeautifulSoup(crude_data[i],'html5lib').decode('ascii').split('<html>\n <head>\n </head>\n <body>\n')[1].split('</body>\n</html>')[0] 
         for i in range(len(crude_data))]

#Add each element into a list. 
final_df = [[row.split(',') for row in texts[element].split('\n')] for element in range(len(texts))]

In [6]:
# There was a single value that made noise to all the data, it had 20 columns instead of 18, it weas fixed manually with
# the next code:

for i in range(len(final_df[2])):
    try:
        if len(final_df[2][i])==20:
            final_df[2].pop(i)
    except:
        pass

In [7]:
#Generate the final dataframe with all the raw data from the three links:

dataframes = [pd.DataFrame(data[1:-1], columns = data[0]) for data in final_df]
fire_df=pd.DataFrame()
for dataframe in dataframes:
    fire_df = fire_df.append(dataframe)

fire_df = fire_df
fire_df.to_csv('fire_data.csv')

# Data cleaning

In [8]:
# Rename Columns
newcols_dict = {colname : colname.lower().replace('  ','').replace(' ','_').replace('número', 'num').replace('geográficas', 'geo')
                for colname in list(fire_df.columns)}
fire_df = fire_df.rename(columns = newcols_dict)

#Drop unessesary columns
try:
    fire_df = fire_df.drop(['level_0', 'index'], 1)
except:
    pass

Unnamed: 0,num_de_incendio,fuente,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
0,1,OPD BLP,ZI,LAS TORRES,TALA,"""20° 39´ 17.0""""""",103° 40´ 59.82,DESCONOCIDA,24/01/2019,0.0,21.3,0.0,0.0,0.0,0.0,0.0,21.3,15
1,2,OPD BLP,ZI,LA TIZNADA,EL ARENAL,"""20° 43´ 17.2""""""","""103° 37´ 20.8""""""",DESCONOCIDA,24/01/2019,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,12
2,3,OPD BLP,ZI,EL MIRADOR,TALA,"""20° 38´ 57.2""""""","""103° 40´ 48.0""""""",DESCONOCIDA,25/01/2019,0.0,18.3,0.0,0.0,0.0,0.0,0.0,18.3,9


In [9]:
# Check for null values
fire_df.isnull().sum()

num_de_incendio                          0
fuente                                   0
zona                                     0
paraje                                   0
municipio                                0
coordenadas_geo_norte                    0
coordenadas_geo_oeste                    0
causa                                    0
fecha_de_registro                        0
estrato_afectado_forestal_hojarasca      0
estrato_afectado_forestal_pasto          0
estrato_afectado_forestal_arbusto        0
estrato_afectado_forestal_renuevo        0
estrato_afectado_no_forestal_cultivos    0
estrato_afectado_no_forestal_pastizal    0
estrato_afectado_no_forestal_otros       0
superficie_afectada_total                0
num_de_participantes                     0
dtype: int64

In [10]:
# Obtain a descrpition of the data
fire_df.describe().T

Unnamed: 0,count,unique,top,freq
num_de_incendio,205,117,58,2
fuente,205,1,OPD BLP,205
zona,205,2,ZI,134
paraje,205,111,SAN JOSE DE LA MONTAÑA,18
municipio,205,7,ZAPOPAN,108
coordenadas_geo_norte,205,205,"""20° 43´ 06.2""""""",1
coordenadas_geo_oeste,205,202,"""W 103° 40´ 53.2""""""",2
causa,205,15,INTENCIONAL,81
fecha_de_registro,205,123,01/04/2019,4
estrato_afectado_forestal_hojarasca,205,63,0.00,71


In [11]:
# There is only one type of data in "fuente", therefore that column can be descarted since it doesnt yield any information
fire_df = fire_df.drop( ['fuente'], axis = 1)
fire_df.head(2)

Unnamed: 0,num_de_incendio,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
0,1,ZI,LAS TORRES,TALA,"""20° 39´ 17.0""""""",103° 40´ 59.82,DESCONOCIDA,24/01/2019,0.0,21.3,0.0,0.0,0.0,0.0,0.0,21.3,15
1,2,ZI,LA TIZNADA,EL ARENAL,"""20° 43´ 17.2""""""","""103° 37´ 20.8""""""",DESCONOCIDA,24/01/2019,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,12


In [12]:
# Reset the index so it runs from 0 to the number of data
fire_df = fire_df.reset_index(drop=True)
fire_df.tail(2)

Unnamed: 0,num_de_incendio,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
203,95,ZP,LA SALA,ZAPOPAN,"""N 20° 36´ 42.9""""""","""W 103° 33´ 46.8""""""",QUEMA DE BASURA,30/04/2018,0.5,,,,,,,0.5,28
204,96,ZI,LOS NOVOA,TALA,"""N 20° 38´ 54.1""""""","""W 103° 40´ 48.1""""""",INTENCIONAL,30/04/2018,,,,,,6.54,,6.54,13


In [13]:
# Most information is not classified on the estrato_afectado_.+, therefore we are replazing the data of '' to 0

fire_df = fire_df.replace('', float(0.00))

In [14]:
fire_df.tail(2)

Unnamed: 0,num_de_incendio,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
203,95,ZP,LA SALA,ZAPOPAN,"""N 20° 36´ 42.9""""""","""W 103° 33´ 46.8""""""",QUEMA DE BASURA,30/04/2018,0.5,0,0,0,0,0.0,0,0.5,28
204,96,ZI,LOS NOVOA,TALA,"""N 20° 38´ 54.1""""""","""W 103° 40´ 48.1""""""",INTENCIONAL,30/04/2018,0.0,0,0,0,0,6.54,0,6.54,13


In [15]:
# Change the type of the numerical data (which somehow is string) to float
columns_fire = list(fire_df.columns)
columns_fire_num = columns_fire[8:] 
columns_fire_num
for column in columns_fire_num:
    fire_df = fire_df.astype({column : 'float'})

In [17]:
# Modify the dates to an actual date-format
fire_df.fecha_de_registro = pd.to_datetime(fire_df['fecha_de_registro'])

#Order the dataframe by the dates
fire_df = fire_df.sort_values(['fecha_de_registro']).reset_index(drop = True).drop('num_de_incendio', axis = 1)

In [19]:
fire_df.head(2)

Unnamed: 0,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
0,ZI,EL NAVO,ZAPOPAN,"""N 20° 43´ 38.4""""""","""W 103° 35´ 06.6""""""",INTENCIONAL,2018-01-01,0.0,0.0,0.0,0.0,0.0,5.7,0.0,5.7,30.0
1,ZI,VILLA FELICIDAD,TALA,"""N 20° 39´ 30.1""""""","""W 103° 40´ 35.6""""""",INTENCIONAL,2018-01-03,0.0,0.28,0.0,0.0,0.0,0.0,0.0,0.28,7.0


In [86]:
# Change North coordinates in GMS to GD

nort = list(fire_df['coordenadas_geo_norte'])

#Obtain purely the GMS coordinates
NGMS = [coordinate.replace('N','').replace('"','').replace(' ','').replace('°','|').replace('´´','').replace('´','|').replace("''","").replace("'","|").split('|') for coordinate in nort]
NGD = [float(coor[0])+float(coor[1])/60+float(coor[2])/3600 for coor in NGMS]

In [81]:
# Change West coordinates in GMS to GD

west = list(fire_df['coordenadas_geo_oeste'])

# There was a single value that made noise
fire_df.loc[(205-34),'coordenadas_geo_oeste'] = "103°  33' 27.1''"

#Obtain purely the GMS coordinates
WGMS = [coordinate.replace('W','').replace('O','0').replace('"','').replace(' ','').replace('°','|').replace('´´','').replace('´','|').replace("''","").replace("'","|").split('|') for coordinate in west]
WGD = [float(coor[0])+float(coor[1])/60+float(coor[2])/3600 for coor in WGMS]


In [91]:
fire_df.coordenadas_geo_oeste = pd.Series(WGD)
fire_df.coordenadas_geo_norte = pd.Series(NGD)
fire_df.head(2)

Unnamed: 0,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,estrato_afectado_forestal_renuevo,estrato_afectado_no_forestal_cultivos,estrato_afectado_no_forestal_pastizal,estrato_afectado_no_forestal_otros,superficie_afectada_total,num_de_participantes
0,ZI,EL NAVO,ZAPOPAN,20.727333,103.585167,INTENCIONAL,2018-01-01,0.0,0.0,0.0,0.0,0.0,5.7,0.0,5.7,30.0
1,ZI,VILLA FELICIDAD,TALA,20.658361,103.676556,INTENCIONAL,2018-01-03,0.0,0.28,0.0,0.0,0.0,0.0,0.0,0.28,7.0


In [90]:
# Obtain some insights from the numerical data
fire_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
coordenadas_geo_norte,205.0,20.668892,0.053346,20.500472,20.635361,20.659361,20.713306,20.9835
coordenadas_geo_oeste,205.0,103.573721,0.086926,103.408222,103.498083,103.558667,103.676778,103.800583
estrato_afectado_forestal_hojarasca,205.0,1.030537,4.013565,0.0,0.0,0.0,0.28,33.96
estrato_afectado_forestal_pasto,205.0,1.367043,4.336537,0.0,0.0,0.0,0.9,44.2
estrato_afectado_forestal_arbusto,205.0,0.312488,1.671299,0.0,0.0,0.0,0.0,15.0
estrato_afectado_forestal_renuevo,205.0,0.004878,0.069843,0.0,0.0,0.0,0.0,1.0
estrato_afectado_no_forestal_cultivos,205.0,0.019512,0.279372,0.0,0.0,0.0,0.0,4.0
estrato_afectado_no_forestal_pastizal,205.0,1.216195,7.245793,0.0,0.0,0.0,0.0,100.0
estrato_afectado_no_forestal_otros,205.0,0.000146,0.001559,0.0,0.0,0.0,0.0,0.02
superficie_afectada_total,205.0,14.580702,136.110342,0.0,0.35,1.18,4.04,1934.0


In [92]:
fire_df.to_csv('fire_data_clean.csv')

In [97]:
needed_data = fire_df[['coordenadas_geo_norte','coordenadas_geo_oeste','fecha_de_registro']]
needed_data.head()

Unnamed: 0,coordenadas_geo_norte,coordenadas_geo_oeste,fecha_de_registro
0,20.727333,103.585167,2018-01-01
1,20.658361,103.676556,2018-01-03
2,20.654,103.689417,2018-01-04
3,20.620278,103.671111,2018-01-04
4,20.627417,103.502528,2018-01-04


In [129]:
# A list of all the needed data
latitude = list(needed_data['coordenadas_geo_norte'])
longitude = list(needed_data['coordenadas_geo_oeste'])
date = list(needed_data['fecha_de_registro'])
date_vals = [d.value//10**9 for d in date]

In [144]:
# Trial
'{}{}/{},-{},{}'.format(url,key,str(latitude[0]),str(longitude[0]),str(date_vals[0]))

'https://api.darksky.net/forecast/3674abb3a8982b4a3359284ea8d0986c/20.72733333333333,-103.58516666666667,1514764800'

# API consumption

In [143]:
# Acces API DarkSky do determine the metereological conditions when there was a fire
key = '3674abb3a8982b4a3359284ea8d0986c'
url = 'https://api.darksky.net/forecast/'

json_list = [requests.get('{}{}/{},-{},{}'.format(url,key,str(latitude[i]),str(longitude[i]),str(date_vals[i]))).json() for i in range(len(date_vals))]

In [186]:
# Determine the keys to obtain the most relevant information
# See the information: 
print(list(json_list[0].keys()))

# Lenght of the daily data as set:
daily_len = {len(json_list[i]['daily']['data'][0]) for i in range(len(json_list))}
print(daily_len)

# Lenght of the current data as set:
currently_len = {len(json_list[i]['currently']) for i in range(len(json_list))}
print(currently_len)

['latitude', 'longitude', 'timezone', 'currently', 'hourly', 'daily', 'flags', 'offset']
{33, 35, 36, 38, 39, 40}
{12, 13, 14, 15, 16, 17, 18, 19}


In [250]:
# Decide to use the daily data
from pandas.io.json import json_normalize

pd_met_info = [pd.DataFrame(json_list[i]['daily']['data']) for i in range(len(json_list))]

pd_distinct = [pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame()]

for i in range(len(pd_met_info)):
    if (len(list(pd_met_info[i].columns)) == list(daily_len)[0]):
           pd_distinct[0] = pd_distinct[0].append(pd_met_info[i])
    elif (len(list(pd_met_info[i].columns)) == list(daily_len)[1]):
           pd_distinct[1] = pd_distinct[1].append(pd_met_info[i])
    elif (len(list(pd_met_info[i].columns)) == list(daily_len)[2]):
           pd_distinct[2] = pd_distinct[2].append(pd_met_info[i])
    elif (len(list(pd_met_info[i].columns)) == list(daily_len)[3]):
           pd_distinct[3] = pd_distinct[3].append(pd_met_info[i])
    elif (len(list(pd_met_info[i].columns)) == list(daily_len)[4]):
           pd_distinct[4] = pd_distinct[4].append(pd_met_info[i])
    elif (len(list(pd_met_info[i].columns)) == list(daily_len)[5]):
           pd_distinct[5] = pd_distinct[5].append(pd_met_info[i])

In [269]:
# To make easier the data manipulation, all the sets are compared to see which columns are the same, those will stick together
lenghts_distinct_df = [len(data) for data in pd_distinct]
new_columns = list(set(pd_distinct[0].columns) & set(pd_distinct[1].columns) & set(pd_distinct[2].columns) & set(pd_distinct[3].columns) & set(pd_distinct[4].columns) & set(pd_distinct[5].columns))


[5, 17, 52, 11, 101, 19]

In [326]:
meteorological_pd = pd.DataFrame()

# Now it is possible to join all the dataframes 
for df in pd_distinct:
    meteorological_pd = meteorological_pd.append(df[new_columns])
meteorological_pd.head(2)
meteorological_pd.to_csv('meteorological.csv')

# Data Cleaning

In [327]:
# Check for null data
meteorological_pd.isnull().sum()

temperatureMin                 0
apparentTemperatureMinTime     0
temperatureHighTime            0
apparentTemperatureMin         0
time                           0
dewPoint                       0
apparentTemperatureHighTime    0
uvIndex                        0
temperatureLowTime             0
apparentTemperatureMaxTime     0
apparentTemperatureLowTime     0
sunriseTime                    0
windBearing                    0
apparentTemperatureLow         0
icon                           0
cloudCover                     1
sunsetTime                     0
temperatureMaxTime             0
uvIndexTime                    0
windSpeed                      0
temperatureMinTime             0
apparentTemperatureHigh        0
humidity                       0
temperatureMax                 0
apparentTemperatureMax         0
moonPhase                      0
temperatureLow                 0
summary                        0
temperatureHigh                0
dtype: int64

In [328]:
meteorological_pd[meteorological_pd["cloudCover"].isnull()==True][['temperatureMin', 'time', 'uvIndex','cloudCover']]

Unnamed: 0,temperatureMin,time,uvIndex,cloudCover
0,43.91,1522731600,0,


In [329]:
# Change the null value. 
meteorological_pd = meteorological_pd.fillna(0)

In [330]:
# Sort and refresh values
meteorological_pd = meteorological_pd.sort_values('time', axis=0).reset_index(drop=True)
meteorological_pd.head(4)

Unnamed: 0,temperatureMin,apparentTemperatureMinTime,temperatureHighTime,apparentTemperatureMin,time,dewPoint,apparentTemperatureHighTime,uvIndex,temperatureLowTime,apparentTemperatureMaxTime,...,windSpeed,temperatureMinTime,apparentTemperatureHigh,humidity,temperatureMax,apparentTemperatureMax,moonPhase,temperatureLow,summary,temperatureHigh
0,42.43,1514728800,1514746800,42.43,1514700000,49.49,1514746800,5,1514815200,1514746800,...,1.95,1514728800,75.38,0.75,75.1,75.38,0.46,46.32,Mostly cloudy throughout the day.,75.1
1,47.32,1514898000,1514930400,47.32,1514872800,47.83,1514930400,6,1514988000,1514930400,...,2.03,1514898000,75.35,0.65,75.35,75.35,0.54,46.82,Partly cloudy throughout the day.,75.35
2,41.38,1514988000,1515013200,41.38,1514959200,45.71,1515013200,6,1515074400,1515013200,...,1.11,1514988000,69.91,0.75,69.91,69.91,0.58,42.21,Partly cloudy throughout the day.,69.91
3,46.72,1514988000,1515013200,46.72,1514959200,47.4,1515013200,6,1515074400,1515013200,...,1.48,1514988000,75.11,0.67,75.11,75.11,0.58,47.7,Partly cloudy throughout the day.,75.11


In [331]:
# Stay only with relevant information

meteorological_pd = meteorological_pd[['temperatureMin','temperatureMax','apparentTemperatureMin','apparentTemperatureMax', 'temperatureLow','temperatureHigh',
                  'time','dewPoint', 'uvIndex','uvIndexTime', 'windBearing','cloudCover','icon','windSpeed', 'humidity','summary'
                  ]]

In [332]:
meteorological_pd.to_csv('meteorological_clean.csv')
meteorological_pd.head()

Unnamed: 0,temperatureMin,temperatureMax,apparentTemperatureMin,apparentTemperatureMax,temperatureLow,temperatureHigh,time,dewPoint,uvIndex,uvIndexTime,windBearing,cloudCover,icon,windSpeed,humidity,summary
0,42.43,75.1,42.43,75.38,46.32,75.1,1514700000,49.49,5,1514746800,219,0.49,partly-cloudy-day,1.95,0.75,Mostly cloudy throughout the day.
1,47.32,75.35,47.32,75.35,46.82,75.35,1514872800,47.83,6,1514919600,109,0.23,partly-cloudy-day,2.03,0.65,Partly cloudy throughout the day.
2,41.38,69.91,41.38,69.91,42.21,69.91,1514959200,45.71,6,1515006000,38,0.27,partly-cloudy-day,1.11,0.75,Partly cloudy throughout the day.
3,46.72,75.11,46.72,75.11,47.7,75.11,1514959200,47.4,6,1515006000,10,0.27,partly-cloudy-day,1.48,0.67,Partly cloudy throughout the day.
4,47.25,75.89,47.25,75.89,48.23,75.89,1514959200,47.48,6,1515006000,17,0.27,partly-cloudy-day,1.49,0.66,Partly cloudy throughout the day.


In [336]:
#Unite both dataframes
forest_fire = pd.concat([fire_df,meteorological_pd], axis=1)
forest_fire.head()

Unnamed: 0,zona,paraje,municipio,coordenadas_geo_norte,coordenadas_geo_oeste,causa,fecha_de_registro,estrato_afectado_forestal_hojarasca,estrato_afectado_forestal_pasto,estrato_afectado_forestal_arbusto,...,time,dewPoint,uvIndex,uvIndexTime,windBearing,cloudCover,icon,windSpeed,humidity,summary
0,ZI,EL NAVO,ZAPOPAN,20.727333,103.585167,INTENCIONAL,2018-01-01,0.0,0.0,0.0,...,1514700000,49.49,5,1514746800,219,0.49,partly-cloudy-day,1.95,0.75,Mostly cloudy throughout the day.
1,ZI,VILLA FELICIDAD,TALA,20.658361,103.676556,INTENCIONAL,2018-01-03,0.0,0.28,0.0,...,1514872800,47.83,6,1514919600,109,0.23,partly-cloudy-day,2.03,0.65,Partly cloudy throughout the day.
2,ZI,LA TOMA DE VILLA,TALA,20.654,103.689417,INTENCIONAL,2018-01-04,0.0,0.0,0.0,...,1514959200,45.71,6,1515006000,38,0.27,partly-cloudy-day,1.11,0.75,Partly cloudy throughout the day.
3,ZP,LA LOBERA,TALA,20.620278,103.671111,QUEMA DE BASURA,2018-01-04,0.44,0.0,0.0,...,1514959200,47.4,6,1515006000,10,0.27,partly-cloudy-day,1.48,0.67,Partly cloudy throughout the day.
4,ZP,SAN JOSE DE LA MONTAÑA,ZAPOPAN,20.627417,103.502528,INTENCIONAL,2018-01-04,1.2,0.0,0.0,...,1514959200,47.48,6,1515006000,17,0.27,partly-cloudy-day,1.49,0.66,Partly cloudy throughout the day.


In [337]:
forest_fire.to_csv('forest_fire_jalisco_data.csv')

In [338]:
# Obtain some insights

forest_fire.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
coordenadas_geo_norte,205.0,20.66889,0.05334616,20.50047,20.63536,20.65936,20.71331,20.9835
coordenadas_geo_oeste,205.0,103.5737,0.08692644,103.4082,103.4981,103.5587,103.6768,103.8006
estrato_afectado_forestal_hojarasca,205.0,1.030537,4.013565,0.0,0.0,0.0,0.28,33.96
estrato_afectado_forestal_pasto,205.0,1.367043,4.336537,0.0,0.0,0.0,0.9,44.2
estrato_afectado_forestal_arbusto,205.0,0.3124878,1.671299,0.0,0.0,0.0,0.0,15.0
estrato_afectado_forestal_renuevo,205.0,0.004878049,0.06984303,0.0,0.0,0.0,0.0,1.0
estrato_afectado_no_forestal_cultivos,205.0,0.0195122,0.2793721,0.0,0.0,0.0,0.0,4.0
estrato_afectado_no_forestal_pastizal,205.0,1.216195,7.245793,0.0,0.0,0.0,0.0,100.0
estrato_afectado_no_forestal_otros,205.0,0.0001463415,0.001558672,0.0,0.0,0.0,0.0,0.02
superficie_afectada_total,205.0,14.5807,136.1103,0.0,0.35,1.18,4.04,1934.0
