In [18]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import yellowbrick as yb

from yellowbrick.features.rankd import Rank1D, Rank2D 
from yellowbrick.features.radviz import RadViz 
from yellowbrick.features.pcoords import ParallelCoordinates 
from yellowbrick.features.jointplot import JointPlotVisualizer
from yellowbrick.features.pca import PCADecomposition
from yellowbrick.features.scatter import ScatterVisualizer

In [10]:
data = pd.read_csv("data/forest_fires_clean.csv")
data.head()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,month_nov,month_oct,month_sep,day_fri,day_mon,day_sat,day_sun,day_thu,day_tue,day_wed
0,7,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,1,0,0,0,0,0,0
1,7,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,1,0,0,0,0,0,0,1,0
2,7,4,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,1,0,0,0,1,0,0,0,0
3,8,6,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,1,0,0,0,0,0,0
4,8,6,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,0,0,1,0,0,0


###### Description

In [19]:
data.describe()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,month_nov,month_oct,month_sep,day_fri,day_mon,day_sat,day_sun,day_thu,day_tue,day_wed
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,...,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,4.669246,4.299807,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,...,0.001934,0.029014,0.332689,0.16441,0.143133,0.162476,0.183752,0.117988,0.123791,0.104449
std,2.313778,1.2299,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,...,0.04398,0.168007,0.471632,0.371006,0.350548,0.369244,0.387657,0.322907,0.329662,0.306138
min,1.0,2.0,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,4.0,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,4.0,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,5.0,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,9.0,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


###### Data Types

In [20]:
data.dtypes


X              int64
Y              int64
FFMC         float64
DMC          float64
DC           float64
ISI          float64
temp         float64
RH             int64
wind         float64
rain         float64
area         float64
month_apr      int64
month_aug      int64
month_dec      int64
month_feb      int64
month_jan      int64
month_jul      int64
month_jun      int64
month_mar      int64
month_may      int64
month_nov      int64
month_oct      int64
month_sep      int64
day_fri        int64
day_mon        int64
day_sat        int64
day_sun        int64
day_thu        int64
day_tue        int64
day_wed        int64
dtype: object

###### Checking for null values

In [15]:
data.isnull().values.any()

False

In [17]:
data[data.columns[:]].isnull().sum()

X            0
Y            0
FFMC         0
DMC          0
DC           0
ISI          0
temp         0
RH           0
wind         0
rain         0
area         0
month_apr    0
month_aug    0
month_dec    0
month_feb    0
month_jan    0
month_jul    0
month_jun    0
month_mar    0
month_may    0
month_nov    0
month_oct    0
month_sep    0
day_fri      0
day_mon      0
day_sat      0
day_sun      0
day_thu      0
day_tue      0
day_wed      0
dtype: int64

> No null values found. Life is good, perhaps!

In [22]:
print("Correlation:", data.corr(method='pearson'))


Correlation:                   X         Y      FFMC       DMC        DC       ISI  \
X          1.000000  0.539548 -0.021039 -0.048384 -0.085916  0.006210   
Y          0.539548  1.000000 -0.046308  0.007782 -0.101178 -0.024488   
FFMC      -0.021039 -0.046308  1.000000  0.382619  0.330512  0.531805   
DMC       -0.048384  0.007782  0.382619  1.000000  0.682192  0.305128   
DC        -0.085916 -0.101178  0.330512  0.682192  1.000000  0.229154   
ISI        0.006210 -0.024488  0.531805  0.305128  0.229154  1.000000   
temp      -0.051258 -0.024103  0.431532  0.469594  0.496208  0.394287   
RH         0.085223  0.062221 -0.300995  0.073795 -0.039192 -0.132517   
wind       0.018798 -0.020341 -0.028485 -0.105342 -0.203466  0.106826   
rain       0.065387  0.033234  0.056702  0.074790  0.035861  0.067668   
area       0.063385  0.044873  0.040122  0.072994  0.049383  0.008258   
month_apr  0.063832 -0.008405 -0.117199 -0.197543 -0.268211 -0.106478   
month_aug -0.059669 -0.010404  0.22810