# Day 32

### Decision Tree

In [1]:
from sklearn import tree
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import graphviz

In [2]:
iris = load_iris()
x = iris['data']
y = iris['target']
(x.shape, y.shape)

((150, 4), (150,))

In [3]:
type(x)

numpy.ndarray

In [4]:
Dtree = tree.DecisionTreeClassifier()
Dtree.fit(x, y)
Dtree.score(x, y)

1.0

In [5]:
Dtree.predict([[5.2, 3.6, 1.1, 0.3]])

array([0])

In [6]:
# Exporting graphvis data from our tree
tree_graph = tree.export_graphviz(Dtree, out_file=None, feature_names=iris.feature_names, class_names=iris.target_names,filled=True, rounded=True, special_characters=True)

In [7]:
graph = graphviz.Source(tree_graph)

In [9]:
# graph.render(format='png')

In [11]:
# graph

### Naive Bayes Classifier

In [None]:
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB

In [12]:
x = iris.data
y = iris.target

<h5>Bernoulli Naive Bayes<h5>

In [None]:
BNB = BernoulliNB()
BNB.fit(x, y)
BNB.score(x, y)

<h5>Gaussian Naive Bayes<h5>

In [None]:
GNB = GaussianNB()
GNB.fit(x, y)
GNB.score(x, y)

<h5>Multinomial Naive Bayes<h5>

In [None]:
MNB = MultinomialNB()
MNB.fit(x, y)
MNB.score(x, y)

#### Prediction of some values using different Bayes Algorithms

In [None]:
x_test = iris.data[10:100]

In [None]:
y_pred = [GNB.predict(x_test)-1, BNB.predict(x_test)-1, MNB.predict(x_test)-1]
y_pred

In [None]:
plt.plot(np.arange(len(x_test)), y_pred[0], "r:")
plt.plot(np.arange(len(x_test)), y_pred[1], "g--")
plt.plot(np.arange(len(x_test)), y_pred[2], "b-")
plt.grid(axis="both")

### HamSpam Classification

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
df = pd.read_csv("./hamspam.csv",  names=["index", "Type", "News"])
df.iloc[1]['News']

In [None]:
y = df['Type']
y


In [None]:
x = np.array(df['News'])
x

In [None]:
# Converting the X strings to x_vector of CountVectorizer format...
vectorizer = CountVectorizer()
X_vector = vectorizer.fit_transform(x)

In [None]:
X_vector.toarray().shape, y.shape

In [None]:
# So we have x in vector form and we have y now lets try fitting the data into Gausian Bayes Model
gausian_bayes = GaussianNB()
gausian_bayes.fit(X_vector.toarray(), y)
gb_s = gausian_bayes.score(X_vector.toarray(), y)

In [None]:
X_test = ["Hello user, we are proud to inform you that you are a lucky draw winner of this month"]
X_test_vector = vectorizer.transform(X_test)

In [None]:
gausian_bayes.predict(X_test_vector.toarray())

In [None]:
multinomial_bayes = MultinomialNB()
multinomial_bayes.fit(X_vector.toarray(), y)
mb_s = multinomial_bayes.score(X_vector.toarray(), y)

In [None]:
bernoulli_bayes = BernoulliNB()
bernoulli_bayes.fit(X_vector.toarray(), y)
bb_s = bernoulli_bayes.score(X_vector.toarray(), y)

In [None]:


plt.rcdefaults()
fig, ax = plt.subplots()


model = ("Gaussian", "Bernoulli", "Multinomial")
y_pos = np.arange(len(model))
performance = np.array([gb_s, bb_s, mb_s])

ax.barh(y_pos, performance, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(model)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('Accuracy scores of different Bayes Models')

plt.grid(axis="x", alpha=0.3)
plt.show()