In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:black;
           font-size:120%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;
              text-align:center;">
                            Data Info
              
</p>
</div>


<table>
  <tr>
    <th>Features</th>
    <th>MIN MAX Value</th>
    <th>Description</th>
  </tr>
  <tr>
    <td>Size </td>
    <td>7,12,14,16,20</td>
    <td>In order to adjust the flame size, 5 different sizes of fuel containers were used. 
        It was recorded for ease of procedure in classification problems as follows.
        7 cm=1, 12 cm=2, 14 cm=3, 16 cm=4, 20 cm=5</td>
  </tr>
  <tr>
    <td>Fuel</td>
    <td>Gasoline, Kerosene, Thinner, LPG</td>
    <td>Fuel type</td>
  </tr>
  <tr>
    <td>Distance</td>
    <td>10-190</td>
    <td>Indicates the distance of the fuel container to the collimator.</td>
  </tr>
  <tr>
    <td>Desibel</td>
    <td>72-113</td>
    <td>Indicates the decibel value in the area where the flame exist.</td>
  </tr>
  <tr>
    <td>Airflow</td>
    <td>0-17</td>
    <td>Indicates the airflow created by sound waves.</td>
  </tr>
  <tr>
    <td>Frequency</td>
    <td>1-75</td>
    <td>Indicates the frequency of the sound wave.</td>
  </tr>
  <tr>
    <td>Status</td>
    <td>0-1</td>
    <td>0 indicates the non-extinction state, 1 indicates the extinction state.</td>
  </tr>
</table>

In [None]:
!python3 -m pip install -q evalml==0.28.0

In [None]:
!pip install evalml

In [None]:
!pip install openpyxl

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
import missingno as msno



In [None]:
df = pd.read_excel('/kaggle/input/acoustic-extinguisher-fire-dataset/Acoustic_Extinguisher_Fire_Dataset/Acoustic_Extinguisher_Fire_Dataset.xlsx')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.describe()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:black;
           font-size:120%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;
              text-align:center;">
                            EDA
              
</p>
</div>

In [None]:
df_corr = df.corr()
df_corr

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Heatmap(
        x = df_corr.columns,
        y = df_corr.index,
        z = np.array(df_corr),
        text=df_corr.values,
        texttemplate='%{text:.2f}',
        colorscale='blugrn'
        
    )
)


fig.update_layout(margin={'b':0,'l':0,'r':0,},
                 paper_bgcolor='rgb(248, 248, 255)',
                 plot_bgcolor='rgb(248, 248, 255)',
                 title_text='Correlation Matrix',
                 title={'font':{
                             'family':'monospace',
                             'size':22,
                             'color': 'grey',
                         },
                        'x':0.45,'y':0.9},
                 width=800)
fig.show()

<div style=background-color:lightblue;>
<p style="font-family: Arials, sans-serif; font-size: 16px; color: #000000"><strong>INSIGHT:</strong></p>
<ol style="font-family: Arials, sans-serif; font-size: 14px; line-height:1.5; color: rgba(0,0,0,.7)">
<li>There is a moderate positive relationship between frequency and decibel. There is a strong negative relationship between the airflow created by distance and sound pressure. Accordingly, it can be estimated that the airflow decreases as the distance increases. </li>
    <p></p>

In [None]:
size = df['SIZE'].value_counts()

fig = px.pie(names = size.index,
             values = size,
             hole = .5)

fig.add_annotation(showarrow=False,x=0.50,y=0.5,text='Size Distriution',font=dict(size=15,color='grey'))

fig.update_traces(marker = dict(colors=['lightblue','lightpink','lightyellow','lightgreen','plum','papayawhip','burlywood']),
                 textinfo='label+percent')

fig.update_layout(margin={'b':0,'l':0,'r':0,'t':100},
                 paper_bgcolor='rgb(248, 248, 255)',
                 plot_bgcolor='rgb(248, 248, 255)',
                 title={'font':{
                             'family':'monospace',
                             'size':22,
                             'color': 'grey'
                         },
                        'x':0.45,'y':0.9},)
fig.show()

In [None]:
fuel = df['FUEL'].value_counts()

fig = px.bar(x=fuel.index,
             y = fuel,
             text = fuel)
fig.update_traces(textposition='outside', marker = dict(color=['gainsboro','silver','grey']))

fig.update_layout(margin={'b':0,'l':0,'r':0,'t':100},
                 paper_bgcolor='rgb(248, 248, 255)',
                 plot_bgcolor='rgb(248, 248, 255)',
                 title_text='Fuel',
                 title={'font':{
                             'family':'monospace',
                             'size':22,
                             'color': 'grey'
                         },
                        'x':0.45,'y':0.9},
                 width=900)
fig.show()

In [None]:
status = df.groupby('STATUS')['FUEL'].value_counts().reset_index(name='count')

fig = px.bar(x = status['FUEL'],
             y = status['count'],
             color=status['STATUS'].astype('object'),
             text = status['STATUS'],
             barmode='group')

fig.update_layout(margin={'b':0,'l':0,'r':0,},
                 paper_bgcolor='rgb(248, 248, 255)',
                 plot_bgcolor='rgb(248, 248, 255)',
                 title_text='Gender & Fracture Counts',
                 title={'font':{
                             'family':'monospace',
                             'size':22,
                             'color': 'grey'
                         },
                        'x':0.48,'y':0.9},
                 width=800)

fig.show()

In [None]:
fire = ['SIZE','DISTANCE','DESIBEL','AIRFLOW','FREQUENCY']
a = 3
b = 2
c = 1

fig = plt.figure(figsize=(10,10))

for i in fire:
    plt.subplot(a,b,c)
    plt.title('{}, subplot: {} {} {}'. format(i,a,b,c))
    sns.boxplot(data=df, x = df[i], hue=df['STATUS'])
    c = c + 1
    
plt.tight_layout()
plt.show()

In [None]:
df1 = df.select_dtypes(exclude=['object'])

for col in df1.iloc[:,:-1]:
    fig = px.box(df1, x=col, color='STATUS',height=500,width=800,title=col+' VS Status')
    
    fig.update_traces()
    fig.show()

In [None]:
fig, axes = plt.subplots(
                     ncols=2,
                     nrows=3,
                     figsize=(15, 15))

ax1, ax2, ax3, ax4,ax5,ax6 = axes.flatten()

sns.scatterplot(x='DISTANCE', y='DESIBEL', hue='STATUS',data=df, ax = ax1)

sns.scatterplot(x='DESIBEL', y='AIRFLOW', hue='STATUS',data=df, ax = ax2)

sns.scatterplot(x='DISTANCE', y='AIRFLOW', hue='STATUS',data=df, ax = ax3)

sns.scatterplot(x='FREQUENCY', y='DESIBEL', hue='STATUS',data=df, ax = ax4)

sns.scatterplot(x='FREQUENCY', y='AIRFLOW', hue='STATUS',data=df, ax = ax5)

sns.scatterplot(x='DISTANCE', y='FREQUENCY', hue='STATUS',data=df, ax = ax6)

plt.show()

<div style=background-color:lightblue;>
<p style="font-family: Arials, sans-serif; font-size: 16px; color: #000000"><strong>Findings:</strong> By analyzing the categorical data, it is established:</p>

<ol style="font-family: Arials, sans-serif; font-size: 14px; line-height:1.5; color: rgba(0,0,0,.7)">

<li>The decibel value decreases as the distance to the sound wave flame extinguishing system increases. We can observe that the flame can be extinguished in the value ranges of 85–98 dB and 100–110 dB.</li>
<p></p>

<li>Seond graph demonstrates that the flame can be extinguished within the decibel ranges of 85–98 dB and 100–110 dB, within the 2.5–17 m/s airflow range.</li>
<p></p>

<li>Third graph shows that the airflow value decreases as the distance increases. It can be stated that the flame can be extinguished with high airflow at close distances and low airflow at long distances. It was observed that flames can be extinguished within the range of 2.5–17 m/s airflow value.</li>
<p></p>

<li>From forth graph we can observe that the range of values required to extinguish the flame at frequencies between 10 and 50 Hz is 85–113 dB.</li>
<p></p>

<li>Fifth graph shows that the flame could be extinguished in the 2–70 Hz frequency range. </li>
<p></p>

<li>In last graph we can examone that flames with 10–55 Hz frequency ranges can be effectively extinguished at distances of 10–100 cm and flames with 12–30 Hz frequency ranges at distances between 100 and 170 cm for all fuel types.</li>
<p></p>

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:black;
           font-size:130%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;
              text-align:center;">
                            EvalML AutoML
              
</p>
</div>

In [None]:
import evalml

from evalml.automl import AutoMLSearch

X = df.drop('STATUS',1)
y = df['STATUS']

X_train, X_test, y_train, y_test = evalml.preprocessing.split_data(X, y, problem_type='binary', test_size=.2)

In [None]:
from evalml import AutoMLSearch

automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type="binary", objective="F1", 
                      additional_objectives=None, max_batches=5)
automl.search()

In [None]:
automl.rankings

In [None]:
automl.best_pipeline

In [None]:
automl.describe_pipeline(automl.rankings.iloc[0]['id'])

In [None]:
automl.best_pipeline.score(X_test,y_test,objectives=['auc','f1','Precision','Recall'])

In [None]:
pred = automl.best_pipeline.predict(X_test)
pred

<div style=background-color:lightblue;>
<p style="font-family: Arials, sans-serif; padding:10px; font-size: 17px; color: #000000">Thank you for reading this work! Any feedback on this work would be very grateful.</p>


In [None]:
nan