In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('forestfires.csv');

In [3]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [4]:
def classify_area(area):
    if area == 0.0:
        return 'NotAffected'
    elif area <= 10.0:
        return 'PartiallyAffected'
    else:
        return 'MostlyAffected'

df['class'] = df['area'].apply(classify_area)

# Create subsets
not_affected = df[df['class'] == 'NotAffected']
partially_affected = df[df['class'] == 'PartiallyAffected']
mostly_affected = df[df['class'] == 'MostlyAffected']


In [5]:
not_affected.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,class
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,NotAffected


In [6]:
partially_affected.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,class
138,9,9,jul,tue,85.8,48.3,313.4,3.9,18.0,42,2.7,0.0,0.36,PartiallyAffected
139,1,4,sep,tue,91.0,129.5,692.6,7.0,21.7,38,2.2,0.0,0.43,PartiallyAffected
140,2,5,sep,mon,90.9,126.5,686.5,7.0,21.9,39,1.8,0.0,0.47,PartiallyAffected
141,1,2,aug,wed,95.5,99.9,513.3,13.2,23.3,31,4.5,0.0,0.55,PartiallyAffected
142,8,6,aug,fri,90.1,108.0,529.8,12.5,21.2,51,8.9,0.0,0.61,PartiallyAffected


In [7]:
mostly_affected.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,class
194,2,2,aug,tue,94.8,108.3,647.1,17.0,24.6,22,4.5,0.0,10.01,MostlyAffected
195,2,5,aug,fri,93.9,135.7,586.7,15.1,23.5,36,5.4,0.0,10.02,MostlyAffected
196,6,5,apr,thu,81.5,9.1,55.2,2.7,5.8,54,5.8,0.0,10.93,MostlyAffected
197,4,5,sep,thu,92.9,137.0,706.4,9.2,21.5,15,0.9,0.0,11.06,MostlyAffected
198,3,4,sep,tue,91.0,129.5,692.6,7.0,13.9,59,6.3,0.0,11.24,MostlyAffected


In [8]:
merged_df = pd.concat([not_affected, partially_affected], ignore_index=True)

In [9]:
merged_df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,class
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,NotAffected


In [10]:
sorted_df = df.sort_values(by=['temp', 'wind', 'area'], ascending=[True, True, True])

In [11]:
sorted_df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,class
280,4,6,dec,fri,84.7,26.7,352.6,4.1,2.2,59,4.9,0.0,9.27,PartiallyAffected
282,6,3,feb,sun,84.9,27.5,353.5,3.4,4.2,51,4.0,0.0,0.0,NotAffected
465,2,2,feb,sat,79.5,3.6,15.3,1.8,4.6,59,0.9,0.0,6.84,PartiallyAffected
463,6,5,feb,tue,75.1,4.4,16.2,1.9,4.6,82,6.3,0.0,5.39,PartiallyAffected
279,4,4,dec,mon,85.4,25.4,349.7,2.6,4.6,21,8.5,0.0,9.77,PartiallyAffected


In [12]:
transposed_df = df.T

In [13]:
transposed_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,507,508,509,510,511,512,513,514,515,516
X,7,7,7,8,8,8,8,8,8,7,...,2,1,5,6,8,4,2,7,1,6
Y,5,4,4,6,6,6,6,6,6,5,...,4,2,4,5,6,3,4,4,4,3
month,mar,oct,oct,mar,mar,aug,aug,aug,sep,sep,...,aug,aug,aug,aug,aug,aug,aug,aug,aug,nov
day,fri,tue,sat,fri,sun,sun,mon,mon,tue,sat,...,fri,fri,fri,fri,sun,sun,sun,sun,sat,tue
FFMC,86.2,90.6,90.6,91.7,89.3,92.3,92.3,91.5,91.0,92.5,...,91.0,91.0,91.0,91.0,81.6,81.6,81.6,81.6,94.4,79.5


In [14]:
melted_df = pd.melt(df, id_vars=['X', 'Y', 'month', 'day'], var_name='Variable', value_name='Value')


In [15]:
melted_df.head()

Unnamed: 0,X,Y,month,day,Variable,Value
0,7,5,mar,fri,FFMC,86.2
1,7,4,oct,tue,FFMC,90.6
2,7,4,oct,sat,FFMC,90.6
3,8,6,mar,fri,FFMC,91.7
4,8,6,mar,sun,FFMC,89.3


In [16]:
# First ensure melt has a 'class' column if needed for pivot
wide_df = melted_df.pivot_table(index=['X', 'Y', 'month', 'day'], 
                                 columns='Variable', 
                                 values='Value', 
                                 aggfunc='first').reset_index()


In [17]:
wide_df.head()

Variable,X,Y,month,day,DC,DMC,FFMC,ISI,RH,area,class,rain,temp,wind
0,1,2,aug,fri,529.8,108.0,90.1,12.5,66,0.0,NotAffected,0.0,14.7,2.7
1,1,2,aug,sat,715.1,231.1,93.7,8.4,32,0.0,NotAffected,0.0,25.9,3.1
2,1,2,aug,sun,601.4,142.4,91.4,10.6,39,0.0,NotAffected,0.0,19.5,6.3
3,1,2,aug,thu,661.3,114.3,91.7,6.3,45,0.0,NotAffected,0.0,20.2,3.6
4,1,2,aug,tue,561.6,121.2,91.0,7.0,19,0.0,NotAffected,0.0,21.6,6.7
