In [96]:
import pandas as pd

## Display top 10 rows of the dataset
amazon_df = pd.read_csv("amazon.csv", encoding='latin-1')

top_10_rows_output = amazon_df.head(10).to_markdown()

with open("top_10_rows_output.md", "w") as file:
    file.write(top_10_rows_output)

amazon_df.head(10)

Unnamed: 0,year,state,month,number,date
0,1998,Acre,Janeiro,0.0,1998-01-01
1,1999,Acre,Janeiro,0.0,1999-01-01
2,2000,Acre,Janeiro,0.0,2000-01-01
3,2001,Acre,Janeiro,0.0,2001-01-01
4,2002,Acre,Janeiro,0.0,2002-01-01
5,2003,Acre,Janeiro,10.0,2003-01-01
6,2004,Acre,Janeiro,0.0,2004-01-01
7,2005,Acre,Janeiro,12.0,2005-01-01
8,2006,Acre,Janeiro,4.0,2006-01-01
9,2007,Acre,Janeiro,0.0,2007-01-01


In [94]:
## Checks last 10 rows

top_10_last_rows_output = amazon_df.tail(10).to_markdown()

with open("top_10_last_rows_output.md", "w") as file:
    file.write(top_10_last_rows_output)

amazon_df.tail(10)

Unnamed: 0,year,state,month,number,date
6444,2007,Tocantins,Dezembro,13.0,2007-01-01
6445,2008,Tocantins,Dezembro,7.0,2008-01-01
6446,2009,Tocantins,Dezembro,46.0,2009-01-01
6447,2010,Tocantins,Dezembro,72.0,2010-01-01
6448,2011,Tocantins,Dezembro,105.0,2011-01-01
6449,2012,Tocantins,Dezembro,128.0,2012-01-01
6450,2013,Tocantins,Dezembro,85.0,2013-01-01
6451,2014,Tocantins,Dezembro,223.0,2014-01-01
6452,2015,Tocantins,Dezembro,373.0,2015-01-01
6453,2016,Tocantins,Dezembro,119.0,2016-01-01


In [104]:
## Find shape of the dataset

shape_output = amazon_df.shape

markdown_output = f"Number of rows: {shape_output[0]}\nNumber of columns: {shape_output[1]}"

with open("amazon_df_shape.md", "w") as file:
    file.write(markdown_output)

amazon_df.shape

(6454, 5)

In [118]:
## Get information about the dataset
import sys

original_stdout = sys.stdout

with open("dataset_info.md", "w") as file:

    sys.stdout = file

    amazon_df.info()

sys.stdout = original_stdout

amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6454 entries, 0 to 6453
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    6454 non-null   int64  
 1   state   6454 non-null   object 
 2   month   6454 non-null   object 
 3   number  6454 non-null   float64
 4   date    6454 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 252.2+ KB


In [119]:
## Check for duplicate data
duplicate_rows = amazon_df.duplicated()

## Drop duplicates
amazon_df = amazon_df[~duplicate_rows]

duplicates_info = f"Number of duplicate rows: {duplicate_rows.sum()}"

with open("duplicate_data_info.md", "w") as file:
    file.write(duplicates_info)

print(duplicate_rows)

print(amazon_df)

0       False
1       False
2       False
3       False
4       False
        ...  
6449    False
6450    False
6451    False
6452    False
6453    False
Length: 6454, dtype: bool
      year      state     month  number        date
0     1998       Acre   Janeiro     0.0  1998-01-01
1     1999       Acre   Janeiro     0.0  1999-01-01
2     2000       Acre   Janeiro     0.0  2000-01-01
3     2001       Acre   Janeiro     0.0  2001-01-01
4     2002       Acre   Janeiro     0.0  2002-01-01
...    ...        ...       ...     ...         ...
6449  2012  Tocantins  Dezembro   128.0  2012-01-01
6450  2013  Tocantins  Dezembro    85.0  2013-01-01
6451  2014  Tocantins  Dezembro   223.0  2014-01-01
6452  2015  Tocantins  Dezembro   373.0  2015-01-01
6453  2016  Tocantins  Dezembro   119.0  2016-01-01

[6422 rows x 5 columns]


In [121]:
## Check null values
amazon_df.isnull().sum()

null_values_info = amazon_df.isnull().sum()

markdown_content = f"""```
{null_values_info}
```"""

with open("null_values_info.md", "w") as file:
    file.write(markdown_content)

amazon_df.isnull().sum()

year      0
state     0
month     0
number    0
date      0
dtype: int64

In [123]:
## Overall statistics
amazon_df.describe()

overall_statistics = amazon_df.describe()

markdown_content = f"""```
{overall_statistics}
```"""

with open("overall_statistics.md", "w") as file:
    file.write(markdown_content)

amazon_df.describe()

Unnamed: 0,year,number
count,6422.0,6422.0
mean,2007.490969,108.815178
std,5.731806,191.142482
min,1998.0,0.0
25%,2003.0,3.0
50%,2007.0,24.497
75%,2012.0,114.0
max,2017.0,998.0


In [124]:
## Dictionary mapping Portuguese month names to English
month_translation = {
    "Janeiro": "January",
    "Fevereiro": "February",
    "Março": "March",
    "Abril": "April",
    "Maio": "May",
    "Junho": "June",
    "Julho": "July",
    "Agosto": "August",
    "Setembro": "September",
    "Outubro": "October",
    "Novembro": "November",
    "Dezembro": "December"
}

## Rename month names
amazon_df['month'] = amazon_df['month'].map(month_translation)

In [126]:
## Total number of fires registered
total_fires = amazon_df['number'].sum()
print(total_fires, 'Total fires registered')

markdown_content = f""" Total number of fires registered: {total_fires}
"""

with open("total_fires.md", "w") as file:
    file.write(markdown_content)

698811.073 Total fires registered


In [130]:
## Month with maximum number of forest fires
max_fires_month = amazon_df.groupby('month')['number'].sum().idxmax()

print(max_fires_month, 'is the month with the most number of forest fires')

fires_per_month = amazon_df.groupby('month')['number'].sum()

month_most_fires = fires_per_month.idxmax()

markdown_content = f"""
Month with the most fires reported: {month_most_fires}
"""

with open("month_most_fires.md", "w") as file:
    file.write(markdown_content)

July is the month with the most number of forest fires


In [132]:
## Year with maximum number of forest fires
max_fires_year = amazon_df.groupby('year')['number'].sum().idxmax()

print(max_fires_year, 'is the year with the most number of forest fires')

fires_per_year = amazon_df.groupby('year')['number'].sum()

year_most_fires = fires_per_year.idxmax()

markdown_content = f"""Year with the most fires reported: {year_most_fires}
"""

with open("year_most_fires.md", "w") as file:
    file.write(markdown_content)

2003 is the year with the most number of forest fires


In [133]:
## State with maximum number of forest fires
max_fires_state = amazon_df.groupby('state')['number'].sum().idxmax()

print(max_fires_state, 'is the state with the most number of forest fires')

fires_per_state = amazon_df.groupby('state')['number'].sum()

state_most_fires = fires_per_state.idxmax()

markdown_content = f"""State with the most fires reported: {state_most_fires}
"""

with open("state_most_fires.md", "w") as file:
    file.write(markdown_content)

Mato Grosso is the state with the most number of forest fires


In [134]:
## Total number of fires reported in Amazonas
amazonas_fires = amazon_df[amazon_df['state'] == 'Amazonas']['number'].sum()

print(amazonas_fires, 'is the total number of fires reported in Amazonas')

total_fires_amazon = amazon_df[amazon_df['state'] == 'Amazonas']['number'].sum()

markdown_content = f"""Total number of fires reported in Amazonas: {total_fires_amazon}
"""

with open("total_fires_amazon.md", "w") as file:
    file.write(markdown_content)

30650.129 is the total number of fires reported in Amazonas


In [136]:
## Number of fires reported in Amazonas (year-wise)
amazonas_yearly_fires = amazon_df[amazon_df['state'] == 'Amazonas'].groupby('year')['number'].sum()

print(amazonas_yearly_fires)

amazonas_df = amazon_df[amazon_df['state'] == 'Amazonas']

yearly_fires_amazonas = amazonas_df.groupby('year')['number'].sum()

markdown_content = "Number of fires reported in Amazonas (year-wise):\n\n"
markdown_content += yearly_fires_amazonas.to_markdown()

with open("yearly_fires_amazonas.md", "w") as file:
    file.write(markdown_content)

year
1998     946.000
1999    1061.000
2000     853.000
2001    1297.000
2002    2852.000
2003    1524.268
2004    2298.207
2005    1657.128
2006     997.640
2007     589.601
2008    2717.000
2009    1320.601
2010    2324.508
2011    1652.538
2012    1110.641
2013     905.217
2014    2385.909
2015    1189.994
2016    2060.972
2017     906.905
Name: number, dtype: float64


In [138]:
## Display number of fires were reported in Amazonas (day-wise).
amazonas_daily_fires = amazon_df[amazon_df['state'] == 'Amazonas'].groupby(['year', 'month', amazon_df['date'].str[-2:]])['number'].sum()

print(amazonas_daily_fires)

amazonas_df = amazon_df[amazon_df['state'] == 'Amazonas']

daily_fires_amazonas = amazonas_df[amazon_df['state'] == 'Amazonas'].groupby(['year', 'month', amazon_df['date'].str[-2:]])['number'].sum()


markdown_content = "Number of fires reported in Amazonas (day-wise):\n\n"
markdown_content += daily_fires_amazonas.to_markdown()

with open("daily_fires_amazonas.md", "w") as file:
    file.write(markdown_content)

year  month      date
1998  April      01        0.000
      August     01      321.000
      December   01      196.000
      February   01        0.000
      January    01        0.000
                          ...   
2017  March      01       45.000
      May        01       40.000
      November   01      552.000
      October    01        1.581
      September  01        4.033
Name: number, Length: 239, dtype: float64


  daily_fires_amazonas = amazonas_df[amazon_df['state'] == 'Amazonas'].groupby(['year', 'month', amazon_df['date'].str[-2:]])['number'].sum()


In [139]:
## Total number of fires reported in 2015
fires_2015 = amazon_df[amazon_df['year'] == 2015]['number'].sum()

## Visualize data based on each month in 2015
fires_2015_monthly = amazon_df[(amazon_df['year'] == 2015)].groupby('month')['number'].sum()

print(fires_2015_monthly)

amazonas_2015_df = amazon_df[(amazon_df['state'] == 'Amazonas') & (amazon_df['year'] == 2015)]

monthly_fires_2015 = amazonas_2015_df.groupby('month')['number'].sum()

total_fires_2015 = amazonas_2015_df['number'].sum()

markdown_content = f"""Total number of fires reported in Amazonas in 2015: {total_fires_2015}
Number of fires reported in Amazonas in 2015 (month-wise):

{monthly_fires_2015.to_markdown()}
"""

with open("total_fires_2015_amazonas.md", "w") as file:
    file.write(markdown_content)

month
April        2573.000
August       4363.125
December     4088.522
February     2309.000
January      4635.000
July         4364.392
June         3260.552
March        2202.000
May          2384.000
November     4034.518
October      4499.525
September    2494.658
Name: number, dtype: float64


In [140]:
## Average number of fires reported from highest to lowest (state-wise)
avg_fires_statewise = amazon_df.groupby('state')['number'].mean().sort_values(ascending=False)

print(avg_fires_statewise)

markdown_content = f"""Average number of fires reported from highest to lowest (state-wise):

{avg_fires_statewise.to_markdown()}
"""

with open("avg_fires_statewise.md", "w") as file:
    file.write(markdown_content)

state
Sao Paulo           213.896226
Mato Grosso         203.479975
Bahia               187.222703
Piau                158.174674
Goias               157.721841
Minas Gerais        156.800243
Tocantins           141.037176
Amazonas            128.243218
Ceara               127.314071
Paraiba             111.073979
Maranhao            105.142808
Pará                102.561272
Pernambuco          102.502092
Roraima             102.029598
Santa Catarina      101.924067
Amapa                91.345506
Rondonia             84.876272
Acre                 77.255356
Rio                  64.698515
Espirito Santo       27.389121
Alagoas              19.271967
Distrito Federal     14.899582
Sergipe              13.543933
Name: number, dtype: float64


In [141]:
## State names where fires were reported in 'dec' month
dec_fires_states = amazon_df[amazon_df['month'] == 'December']['state'].unique()

print(dec_fires_states)

december_fires = amazon_df[amazon_df['month'] == 'December']

states_reported_december = december_fires['state'].unique()

markdown_content = f"""States reported in December:
- {', '.join(states_reported_december)}
"""

with open("states_reported_december.md", "w") as file:
    file.write(markdown_content)

['Acre' 'Alagoas' 'Amapa' 'Amazonas' 'Bahia' 'Ceara' 'Distrito Federal'
 'Espirito Santo' 'Goias' 'Maranhao' 'Mato Grosso' 'Minas Gerais' 'Pará'
 'Paraiba' 'Pernambuco' 'Piau' 'Rio' 'Rondonia' 'Roraima' 'Santa Catarina'
 'Sao Paulo' 'Sergipe' 'Tocantins']
