In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder as One
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import pickle

In [2]:
df = pd.read_csv('./Avalanche.csv')
df.head()

Unnamed: 0,Slope,Forest Density,Snow Density,Air Temperature,Wind,Prediction
0,59,H,52,-17,19,2
1,24,L,23,-8,19,0
2,15,H,30,7,19,0
3,27,L,61,-13,25,1
4,34,H,4,-20,7,0


In [3]:
ct = ColumnTransformer([('one',One(),[1])],remainder='passthrough')
df = ct.fit_transform(df)
df

array([[  1.,   0.,   0., ..., -17.,  19.,   2.],
       [  0.,   1.,   0., ...,  -8.,  19.,   0.],
       [  1.,   0.,   0., ...,   7.,  19.,   0.],
       ...,
       [  0.,   0.,   1., ...,  13.,  35.,   1.],
       [  0.,   1.,   0., ...,  -4.,  35.,   1.],
       [  0.,   0.,   1., ..., -29.,   8.,   2.]])

In [4]:
x,y = df[:,1:-1],df[:,-1]
print(x)
print(y)

[[  0.   0.  59.  52. -17.  19.]
 [  1.   0.  24.  23.  -8.  19.]
 [  0.   0.  15.  30.   7.  19.]
 ...
 [  0.   1.  34.  33.  13.  35.]
 [  1.   0.  34.  51.  -4.  35.]
 [  0.   1.  34.  59. -29.   8.]]
[2. 0. 0. 1. 0. 1. 2. 1. 0. 2. 1. 1. 2. 2. 2. 2. 2. 1. 1. 0. 1. 1. 0. 0.
 2. 1. 1. 2. 2. 1. 1. 2. 1. 2. 0. 2. 1. 2. 2. 2. 1. 2. 1. 2. 1. 0. 1. 2.
 0. 1. 2. 1. 0. 1. 1. 0. 0. 0. 2. 2. 1. 0. 0. 1. 0. 1. 1. 0. 2. 1. 2. 0.
 1. 1. 2. 0. 2. 0. 0. 1. 1. 2. 1. 2. 1. 1. 1. 0. 2. 0. 1. 1. 1. 2. 2. 1.
 2. 1. 2. 2. 0. 0. 1. 0. 1. 2. 1. 0. 2. 1. 1. 2. 2. 2. 2. 2. 1. 1. 0. 1.
 1. 2. 0. 0. 1. 0. 1. 2. 1. 0. 2. 1. 1. 2. 2. 2. 2. 2. 1. 1. 0. 1. 1. 1.
 0. 0. 0. 2. 2. 1. 0. 0. 1. 0. 1. 1. 0. 2. 1. 2. 0. 1. 1. 2. 0. 2. 2. 1.
 0. 0. 1. 0. 1. 1. 0. 2. 1. 2. 0. 2. 2. 1. 0. 0. 1. 0. 1. 1. 0. 2. 1. 2.
 0. 0. 1. 0. 1. 2. 1. 0. 2. 1. 1. 2.]


In [5]:
z = np.abs(stats.zscore(df))
outliers = np.where(z>3)
outliers

(array([], dtype=int64), array([], dtype=int64))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

In [7]:
dtree = DecisionTreeClassifier(random_state = 0)
dtree.fit(X_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [8]:
predictions = dtree.predict(X_test)

In [9]:
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

              precision    recall  f1-score   support

         0.0       0.75      0.92      0.83        13
         1.0       0.88      0.79      0.83        19
         2.0       1.00      0.89      0.94         9

    accuracy                           0.85        41
   macro avg       0.88      0.87      0.87        41
weighted avg       0.87      0.85      0.86        41

[[12  1  0]
 [ 4 15  0]
 [ 0  1  8]]


In [10]:
rfc = RandomForestClassifier(n_estimators=100,random_state = 0)
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [11]:
rfc_pred = rfc.predict(X_test)

print(confusion_matrix(y_test,rfc_pred))

print(classification_report(y_test,rfc_pred))

[[13  0  0]
 [ 3 16  0]
 [ 0  1  8]]
              precision    recall  f1-score   support

         0.0       0.81      1.00      0.90        13
         1.0       0.94      0.84      0.89        19
         2.0       1.00      0.89      0.94         9

    accuracy                           0.90        41
   macro avg       0.92      0.91      0.91        41
weighted avg       0.91      0.90      0.90        41



In [12]:
rfc.score(X_test,y_test)

0.9024390243902439