In [1]:
import pandas as pd

In [2]:
desforestation = pd.read_csv(r'df/desforestation.csv')
fire_incident = pd.read_csv(r'df/fire_incident.csv')
forest_loss = pd.read_csv(r'df/forest_loss.csv')

In [3]:
desforestation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Ano/Estados  16 non-null     int64
 1   AC           16 non-null     int64
 2   AM           16 non-null     int64
 3   AP           16 non-null     int64
 4   MA           16 non-null     int64
 5   MT           16 non-null     int64
 6   PA           16 non-null     int64
 7   RO           16 non-null     int64
 8   RR           16 non-null     int64
 9   TO           16 non-null     int64
 10  AMZ LEGAL    16 non-null     int64
dtypes: int64(11)
memory usage: 1.5 KB


In [4]:
fire_incident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2104 entries, 0 to 2103
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   year       2104 non-null   int64  
 1   month      2104 non-null   int64  
 2   state      2104 non-null   object 
 3   latitude   2104 non-null   float64
 4   longitude  2104 non-null   float64
 5   firespots  2104 non-null   int64  
dtypes: float64(2), int64(3), object(1)
memory usage: 98.8+ KB


In [5]:
forest_loss.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   start year  16 non-null     int64 
 1   end year    16 non-null     int64 
 2   phenomenon  16 non-null     object
 3   severity    16 non-null     object
dtypes: int64(2), object(2)
memory usage: 644.0+ bytes


In [6]:
desforestation.rename(columns={'Ano/Estados': 'year'}, inplace=True)
desforestation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   year       16 non-null     int64
 1   AC         16 non-null     int64
 2   AM         16 non-null     int64
 3   AP         16 non-null     int64
 4   MA         16 non-null     int64
 5   MT         16 non-null     int64
 6   PA         16 non-null     int64
 7   RO         16 non-null     int64
 8   RR         16 non-null     int64
 9   TO         16 non-null     int64
 10  AMZ LEGAL  16 non-null     int64
dtypes: int64(11)
memory usage: 1.5 KB


In [7]:
desforestation['year'] = desforestation['year'].astype(int)

In [8]:
desforestation_long = desforestation.melt(
    id_vars=['year'],
    var_name='state_abbrev',
    value_name='desforestation_km2'
)

In [9]:
desforestation_long = desforestation_long[desforestation_long['state_abbrev'] != 'AMZ LEGAL']
desforestation_long.info()

<class 'pandas.core.frame.DataFrame'>
Index: 144 entries, 0 to 143
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   year                144 non-null    int64 
 1   state_abbrev        144 non-null    object
 2   desforestation_km2  144 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 4.5+ KB


In [10]:
mapping = {
    "ACRE": "AC", "AMAZONAS": "AM", "AMAPA": "AP", "MARANHAO": "MA",
    "MATO GROSSO": "MT", "PARA": "PA", "RONDONIA": "RO",
    "RORAIMA": "RR", "TOCANTINS": "TO"
}

fire_incident['state_abbrev'] = fire_incident['state'].map(mapping)

In [11]:
fire_incident_group = fire_incident.groupby(['year', 'state_abbrev'])['firespots'].sum().reset_index()

In [12]:
fire_incident_group

Unnamed: 0,year,state_abbrev,firespots
0,1999,AC,347
1,1999,AM,1048
2,1999,AP,101
3,1999,MA,4136
4,1999,MT,28538
...,...,...,...
184,2019,MT,17479
185,2019,PA,29700
186,2019,RO,11206
187,2019,RR,4775


In [13]:
year_el_nino = set()
for i, row in forest_loss.iterrows():
    year_el_nino.update(range(int(row['start year']), int(row['end year']) + 1))

desforestation_long['el_nino'] = desforestation_long['year'].isin(year_el_nino)
fire_incident_group['el_nino'] = fire_incident_group['year'].isin(year_el_nino)

In [14]:
desforestation_long.to_csv('new_desforestation.csv', index=False)
fire_incident_group.to_csv('new_fire_incident.csv', index=False)