# Forestfires Data Analysis

In [40]:
import pandas as pd

file_path = "../datasets/forestfires/forestfires.csv"
df = pd.read_csv(file_path, delimiter = ",")
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [41]:
df.shape

(517, 13)

In [42]:
df.describe()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,4.669246,4.299807,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,12.847292
std,2.313778,1.2299,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,63.655818
min,1.0,2.0,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,0.0
25%,3.0,4.0,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,0.0
50%,4.0,4.0,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,0.52
75%,7.0,5.0,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,6.57
max,9.0,9.0,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,1090.84


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [44]:
print("Min area:", df['area'].min())
print("Max area:", df['area'].max())

Min area: 0.0
Max area: 1090.84


In [45]:
# Create data subsets by making classes for amount of region affected.(e.g. NotAffected, Partially affected, Mostlyaffected). 
def classify_area(area):
    if area <= 0:
        return 'NotAffected'
    elif area <= 40:
        return 'PartiallyAffected'
    else:
        return 'MostlyAffected'

df['AreaClass'] = df['area'].apply(classify_area)

not_affected = df[df['AreaClass'] == 'NotAffected']
partially_affected = df[df['AreaClass'] == 'PartiallyAffected']
mostly_affected = df[df['AreaClass'] == 'MostlyAffected']

print("NotAffected:", len(not_affected))
print("PartiallyAffected:", len(partially_affected))
print("MostlyAffected:", len(mostly_affected))

NotAffected: 247
PartiallyAffected: 239
MostlyAffected: 31


In [46]:
# Merge two subsets 
merged_df = pd.concat([not_affected, partially_affected], ignore_index=True)
merged_df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,AreaClass
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,NotAffected


In [47]:
# Sort Data using Temperature, wind and area. 
sorted_df = df.sort_values(['temp', 'wind', 'area'], ascending=False)
sorted_df[['AreaClass', 'temp', 'wind', 'rain']]

Unnamed: 0,AreaClass,temp,wind,rain
498,MostlyAffected,33.3,2.7,0.0
484,PartiallyAffected,33.1,4.0,0.0
496,PartiallyAffected,32.6,3.1,0.0
491,NotAffected,32.4,4.5,0.0
492,NotAffected,32.4,2.2,0.0
...,...,...,...,...
279,PartiallyAffected,4.6,8.5,0.0
463,PartiallyAffected,4.6,6.3,0.0
465,PartiallyAffected,4.6,0.9,0.0
282,NotAffected,4.2,4.0,0.0


In [48]:
# Transposing Data 
transposed_df = df.transpose()
transposed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,507,508,509,510,511,512,513,514,515,516
X,7,7,7,8,8,8,8,8,8,7,...,2,1,5,6,8,4,2,7,1,6
Y,5,4,4,6,6,6,6,6,6,5,...,4,2,4,5,6,3,4,4,4,3
month,mar,oct,oct,mar,mar,aug,aug,aug,sep,sep,...,aug,aug,aug,aug,aug,aug,aug,aug,aug,nov
day,fri,tue,sat,fri,sun,sun,mon,mon,tue,sat,...,fri,fri,fri,fri,sun,sun,sun,sun,sat,tue
FFMC,86.2,90.6,90.6,91.7,89.3,92.3,92.3,91.5,91.0,92.5,...,91.0,91.0,91.0,91.0,81.6,81.6,81.6,81.6,94.4,79.5
DMC,26.2,35.4,43.7,33.3,51.3,85.3,88.9,145.4,129.5,88.0,...,166.9,166.9,166.9,166.9,56.7,56.7,56.7,56.7,146.0,3.0
DC,94.3,669.1,686.9,77.5,102.2,488.0,495.6,608.2,692.6,698.6,...,752.6,752.6,752.6,752.6,665.6,665.6,665.6,665.6,614.7,106.7
ISI,5.1,6.7,6.7,9.0,9.6,14.7,8.5,10.7,7.0,7.1,...,7.1,7.1,7.1,7.1,1.9,1.9,1.9,1.9,11.3,1.1
temp,8.2,18.0,14.6,8.3,11.4,22.2,24.1,8.0,13.1,22.8,...,25.9,25.9,21.1,18.2,27.8,27.8,21.9,21.2,25.6,11.8
RH,51,33,33,97,99,29,27,86,63,40,...,41,41,71,62,35,32,71,70,42,31


In [49]:
# Melting Data to long format 
melt_df = pd.melt(df, id_vars=['month', 'day'], value_vars= ['temp', 'wind', 'area'], var_name='Weather', value_name='Value')
melt_df

Unnamed: 0,month,day,Weather,Value
0,mar,fri,temp,8.20
1,oct,tue,temp,18.00
2,oct,sat,temp,14.60
3,mar,fri,temp,8.30
4,mar,sun,temp,11.40
...,...,...,...,...
1546,aug,sun,area,6.44
1547,aug,sun,area,54.29
1548,aug,sun,area,11.16
1549,aug,sat,area,0.00


In [50]:
# Casting data to wide format 
cast_df = melt_df.pivot_table(index=['month', 'day'], 
                              columns='Weather', 
                              values='Value', 
                              aggfunc='mean').reset_index()

cast_df

Weather,month,day,area,temp,wind
0,apr,fri,0.000000,16.700000,3.100000
1,apr,mon,3.350000,10.900000,3.100000
2,apr,sat,0.000000,9.300000,4.500000
3,apr,sun,20.376667,14.900000,5.666667
4,apr,thu,7.770000,5.800000,5.800000
...,...,...,...,...,...
59,sep,sat,61.804400,21.524000,3.460000
60,sep,sun,14.010741,20.437037,3.955556
61,sep,thu,5.356190,20.390476,3.357143
62,sep,tue,26.352105,18.721053,3.431579
