## Import Dependencies 

In [1]:
import pandas as pd
import numpy as np
import plotly
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as pyo
import cufflinks as cf  
from plotly.offline import init_notebook_mode, plot, iplot
from pandas.plotting import scatter_matrix


In [2]:
 pyo.init_notebook_mode(connected=True)
cf.go_offline()

### Get Data form CSV

In [3]:
iris = pd.read_csv('Iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
iris = iris.drop('Id', axis=1)

In [5]:
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
iris.shape

(150, 5)

## Visualization Data

In [7]:
px.scatter(iris, x='Species', y='PetalWidthCm')

In [8]:
px.line(iris, x='Species', y='PetalWidthCm')

In [10]:
iris = iris.rename(columns={'SepalLengthCm':'SepalLength','SepalWidthCm':'SepalWidth',
                           'PetalLengthCm':'PetalLength','PetalWidthCm':'PetalWidth'})
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Correlation Visualization

In [11]:
px.scatter_matrix(iris, color='Species', title='Iris', dimensions=['SepalLength','SepalWidth',
                                                                  'PetalLength','PetalWidth'])

### Change Feature and Labels

In [12]:
x= iris.drop('Species', axis=1)

In [13]:
y = iris['Species'].copy()

### Change a Label in Numerical Format

In [14]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(y)

In [15]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## Dived a data in test and train

In [16]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=0)

In [17]:
x_train.shape

(120, 4)

In [18]:
y_train.size

120

## Apply a DecisionTreeClassifier Model

In [19]:
from sklearn import tree

dt_model = tree.DecisionTreeClassifier()
dt_model.fit(x_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [20]:
from sklearn.metrics import accuracy_score

prediction_dt = dt_model.predict(x_test)
accuracy_dt = accuracy_score(y_test, prediction_dt)*100

In [21]:
accuracy_dt

100.0

### Cross Validation

In [22]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(dt_model,x_train, y_train, scoring='neg_mean_squared_error', cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores

array([0.28867513, 0.28867513, 0.40824829, 0.        , 0.        ,
       0.        , 0.40824829, 0.28867513, 0.28867513, 0.28867513])

In [23]:
y_test

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0])

In [24]:
prediction_dt

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0])

## Apply a KNeighborsClassifier Model

In [25]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=10)
knn_model.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                     weights='uniform')

In [26]:
prediction_knn = knn_model.predict(x_test)

In [27]:
accuracy_knn = accuracy_score(y_test, prediction_knn)*100

In [28]:
accuracy_knn

100.0

### Cross Validation

In [29]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(knn_model,x_train, y_train, scoring='neg_mean_squared_error', cv=20)
rmse_scores = np.sqrt(-scores)
rmse_scores

array([0.        , 0.40824829, 0.        , 0.        , 0.40824829,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.40824829, 0.57735027, 0.        , 0.        , 0.        ])

In [30]:
y_test

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0])

In [31]:
prediction_knn

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0])

## Apply KMeans Model

In [44]:
from sklearn.cluster import KMeans

km_model = KMeans(n_clusters=3, random_state=2, n_jobs=3)
km_model.fit(x)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=3, n_init=10, n_jobs=3, precompute_distances='auto',
       random_state=2, tol=0.0001, verbose=0)

In [45]:
centers = km_model.cluster_centers_
centers

array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097],
       [5.006     , 3.418     , 1.464     , 0.244     ],
       [6.85      , 3.07368421, 5.74210526, 2.07105263]])

In [46]:
km_model.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,
       2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])

## Creating Catagory

In [35]:
catagory = ['Iris-Satosa','Iris-Versicolor','Iris-Virginica']

### Test a random data using DecisionTreeClassifier Model

In [36]:
data = 5.7,3,4.2,1.1

In [37]:
data_array = np.array([data])
data_array

array([[5.7, 3. , 4.2, 1.1]])

In [38]:
predic = dt_model.predict(data_array)

In [39]:
print(catagory[int(predic[0])])

Iris-Versicolor


### Test a random data using KNeighborsClassifier Model

In [40]:
predic = knn_model.predict(data_array)

In [41]:
print(catagory[int(predic[0])])

Iris-Versicolor


### Test a random data using KMeans Model

In [42]:
predic = km_model.predict(data_array)

In [43]:
print(catagory[int(predic[0])])

Iris-Satosa


In [None]:








#Coded and modified by Tarun