In [1]:
import numpy as np
import pandas as pd

In [56]:
fires=pd.read_csv('./forestfires.csv')
fires

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


# Reduce the fires dataframe with method .groupby to get just one row per month, and average temperature, average RH, and number of fires per month. The goal is to create a dataframe named firesbymonth with columns avg_temp, avg_RH and fire_count.

In [21]:
firesbymonth = fires.groupby('month').agg(
    avg_temp=('temp', 'mean'),   # Calculate average temperature per month
    avg_RH=('RH', 'mean'),              # Calculate average relative humidity per month
    fire_count=('X', 'count')     # Count the number of fire occurrences per month
).reset_index()

firesbymonth[['avg_temp','avg_RH']]=firesbymonth[['avg_temp','avg_RH']].round(3)
firesbymonth

Unnamed: 0,month,avg_temp,avg_RH,fire_count
0,apr,12.044,46.889,9
1,aug,21.632,45.489,184
2,dec,4.522,38.444,9
3,feb,9.635,55.7,20
4,jan,5.25,89.0,2
5,jul,22.109,45.125,32
6,jun,20.494,45.118,17
7,mar,13.083,40.0,54
8,may,14.65,67.0,2
9,nov,11.8,31.0,1


# Sort the dataframe firesbymonth, such that the 12 rows are ordered by month correctly: jan, feb, mar, and so on.

In [40]:
month_order = {
    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
    'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
}

firesbymonth['month_id'] = firesbymonth['month'].map(month_order)
firesbymonth = firesbymonth.sort_values(by='month_id')
firesbymonth = firesbymonth.reset_index(drop=True)
firesbymonth = firesbymonth[['month', 'avg_temp', 'avg_RH', 'fire_count']]

firesbymonth


Unnamed: 0,month,avg_temp,avg_RH,fire_count
0,jan,5.25,89.0,2
1,feb,9.635,55.7,20
2,mar,13.083,40.0,54
3,apr,12.044,46.889,9
4,may,14.65,67.0,2
5,jun,20.494,45.118,17
6,jul,22.109,45.125,32
7,aug,21.632,45.489,184
8,sep,19.612,42.843,172
9,oct,17.093,37.467,15


# Create a new column called conditions in firesbymonth of type string that indicates if a month is dry&hot, dry&cold, wet&hot or wet&cold. Use the mean values of avg_temp and avg_RH to establish the appropriate thresholds. Use method .apply and define the function to apply with lambda.

In [None]:
temp_threshold = firesbymonth['avg_temp'].mean()    # Temperature threshold
rh_threshold = firesbymonth['avg_RH'].mean()        # RH threshold

print("Average Yearly Temperature:", temp_threshold)
print("Average Yearly Relative Humidity:", rh_threshold)

firesbymonth['conditions'] = firesbymonth.apply(
    lambda row: 'dry&hot' if row['avg_temp'] > temp_threshold and row['avg_RH'] < rh_threshold else
                'dry&cold' if row['avg_temp'] <= temp_threshold and row['avg_RH'] < rh_threshold else
                'wet&hot' if row['avg_temp'] > temp_threshold and row['avg_RH'] >= rh_threshold else
                'wet&cold',
                axis=1
    )

firesbymonth

Average Yearly Temperature: 14.326999999999998
Average Yearly Relative Humidity: 48.67291666666667


Unnamed: 0,month,avg_temp,avg_RH,fire_count,conditions
0,jan,5.25,89.0,2,wet&cold
1,feb,9.635,55.7,20,wet&cold
2,mar,13.083,40.0,54,dry&cold
3,apr,12.044,46.889,9,dry&cold
4,may,14.65,67.0,2,wet&hot
5,jun,20.494,45.118,17,dry&hot
6,jul,22.109,45.125,32,dry&hot
7,aug,21.632,45.489,184,dry&hot
8,sep,19.612,42.843,172,dry&hot
9,oct,17.093,37.467,15,dry&hot


# Re-organize the information in fires into a two-way table that shows the total area of fires per day of the week and per month, where NaN are replaced by 0. Towards that end, explore the .pivot_table method.

In [54]:
# Criar a tabela dinâmica usando .pivot_table
fires_pivot = fires.pivot_table(
    values='area',  # A coluna que estamos somando (área dos incêndios)
    index='day',  # Linhas: dias da semana
    columns='month',  # Colunas: meses
    aggfunc='sum',  # Função de agregação: soma
    fill_value=0  # Substituir valores NaN por 0
)

# Exibir a tabela resultante
fires_pivot

month,apr,aug,dec,feb,jan,jul,jun,mar,may,nov,oct,sep
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
fri,0.0,117.61,9.27,28.86,0.0,39.66,1.19,10.84,38.48,0.0,0.0,201.33
mon,3.35,17.81,60.38,9.96,0.0,280.16,5.46,101.94,0.0,0.0,61.64,165.83
sat,0.0,407.57,0.0,6.84,0.0,10.14,70.32,80.65,0.0,0.0,24.23,1545.11
sun,61.13,394.66,8.98,71.21,0.0,17.23,0.9,27.53,0.0,0.0,0.0,378.29
thu,15.54,837.17,5.38,0.0,0.0,4.51,13.27,8.75,0.0,0.0,0.0,112.48
tue,0.0,174.4,24.77,7.53,0.0,100.4,0.0,0.0,0.0,0.0,0.0,500.69
wed,0.0,348.77,11.19,1.1,0.0,7.73,8.16,5.55,0.0,0.0,13.7,182.4
