# import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

# import data

In [None]:
train=pd.read_csv("../input/iris-data/Iris.csv")
train

# Data Analyse

In [None]:
train.info()

In [None]:
train.head()

In [None]:
train.describe()

In [None]:
train.describe(include="all")

In [None]:
train.dtypes

In [None]:
train.corr()

# Missing_values

In [None]:
train.isnull().sum()

* as we can see that our data don't need cleaning.

# columns

In [None]:
list(train.columns)

# shape

In [None]:
train.shape

# value_counts

In [None]:
train.Species.value_counts()

# data visualization

In [None]:
plt.figure(figsize=(10,6))
train.dtypes.value_counts().plot.pie(autopct="%1.1f%%")
plt.title("Data type %")

In [None]:
sns.countplot(x='Species',data=train)


In [None]:
train['Species'].value_counts().plot.pie(explode=[0.1,0.1,0.1],autopct='%1.1f%%',shadow=True, 
                                        figsize=(10,8))
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.title('Average of Species')
sns.barplot(x=train.index, y=train['Species'])
plt.show()


In [None]:
sns.distplot(a=train['PetalLengthCm'], kde=False)

* showing how petal length varies in iris flowers

# Density plot (KDE)

In [None]:
sns.kdeplot(data=train['PetalLengthCm'],shade=True)

# 2D KDE plot

In [None]:
sns.jointplot(x=train['PetalLengthCm'], y=train['PetalWidthCm'], kind="kde")

In [None]:
train.hist(edgecolor='red',figsize=(10,10))
plt.show()

In [None]:
sns.pairplot(train , hue='Species')

In [None]:
train.corr().style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
sns.heatmap(train.corr(),annot=True,cmap="RdYlGn")

1. it seems that there is a high corrolation between PetalLength and PetalWidth

# data Spliting

In [None]:
train

now we gonna split our data to train and test so we need to drop our target "Species".

In [None]:
x=train.drop('Species', axis=1)
y=train.Species

after we drop the target now we need to check the new shape

In [None]:
x.shape

here we gonna split our date to train part and test part.

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=5)

In [None]:
y.shape

In [None]:
x_train.shape

In [None]:
y_train.shape

In [None]:
x_test.shape

In [None]:
y_test.shape

* after spliting the data now we need to applicate the machine learning algorthims.Our data has a target so we must applicate the supervised machine learning algorithms. 

# Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logreg = LogisticRegression()
logreg.fit(x,y)
y_pred = logreg.predict(x)
print(metrics.accuracy_score(y, y_pred))

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.10, C=1.0)
svm.fit(x_train, y_train)

print('The accuracy of the svm classifier on training data is {:.2f} out of 1'.format(svm.score(x_train, y_train)))

print('The accuracy of the svm classifier on test data is {:.2f} out of 1'.format(svm.score(x_test, y_test)))

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(x_train, y_train)

print('The accuracy of the knn classifier is {:.2f} out of 1 on training data'.format(knn.score(x_train, y_train)))
print('The accuracy of the knn classifier is {:.2f} out of 1 on test data'.format(knn.score(x_test, y_test)))