### Importing Dependencies

In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_csv("Iris.csv")

In [None]:
### Accessing the Dataset

In [None]:
data.head()

In [None]:
data.sample(10)

In [None]:
data.describe()

In [None]:
data.info()

### Data Analysis

In [None]:
## Checking NUll Values

data.isnull().sum()

In [None]:
## Id is not Necessary

data.drop(columns = 'Id', inplace = True , axis = 1)

In [None]:
data.columns

In [None]:
data.duplicated().sum()

### Data Visualization

#### Univariate Analysis

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.figure(figsize = (10,10))

sns.displot(data = data, x = 'SepalLengthCm', kde = True)

In [None]:
print(np.mean(data['SepalLengthCm']))
print(np.median(data['SepalLengthCm']))


In [None]:
sns.boxplot(data = data, x = 'SepalLengthCm')

In [None]:
plt.figure(figsize = (10,10))

sns.displot(data = data, x = 'SepalWidthCm', kde = True)

In [None]:
print(np.mean(data['SepalWidthCm']))
print(np.median(data['SepalWidthCm']))


In [None]:
sns.boxplot(data = data, x = 'SepalWidthCm')

In [None]:
Q1 = data['SepalWidthCm'].quantile(0.25)
Q3 = data['SepalWidthCm'].quantile(0.75)

IQR = Q3 - Q1

lwr_bnd = Q1 - 1.5*IQR
upr_bnd = Q3 + 1.5*IQR

data = data[(data['SepalWidthCm'] > lwr_bnd) & (data['SepalWidthCm'] < upr_bnd)]

In [None]:
sns.boxplot(data = data, x = 'SepalWidthCm')

In [None]:
plt.figure(figsize = (10,10))

sns.displot(data = data, x = 'PetalLengthCm', kde = True)

In [None]:
print(np.mean(data['PetalLengthCm']))
print(np.median(data['PetalLengthCm']))


In [None]:
sns.boxplot(data = data, x = 'PetalLengthCm')

In [None]:
plt.figure(figsize = (10,10))

sns.displot(data = data, x = 'PetalWidthCm', kde = True)

In [None]:
print(np.mean(data['PetalWidthCm']))
print(np.median(data['PetalWidthCm']))


In [None]:
sns.boxplot(data = data, x = 'PetalWidthCm')

#### Bi- variate Analysis

In [None]:
sns.scatterplot(data=data, x='SepalLengthCm', y='SepalWidthCm', hue='Species', palette='Set2')

In [None]:
correlation_coefficient = np.corrcoef(data['SepalLengthCm'], data['SepalWidthCm'])[0, 1]
print(f'Correlation Coefficient: {correlation_coefficient:.2f}')

In [None]:
sns.scatterplot(data = data, x = 'PetalLengthCm', y = 'PetalWidthCm',hue='Species', palette='Set2')

In [None]:
correlation_coefficient = np.corrcoef(data['PetalLengthCm'], data['PetalWidthCm'])[0, 1]
print(f'Correlation Coefficient: {correlation_coefficient:.2f}')

In [None]:
sns.scatterplot(data = data, x = 'PetalLengthCm', y = 'SepalLengthCm',hue='Species', palette='Set2')

In [None]:
sns.scatterplot(data = data, x = 'PetalLengthCm', y = 'SepalWidthCm',hue='Species', palette='Set2')

In [None]:
sns.scatterplot(data = data, x = 'PetalWidthCm', y = 'SepalLengthCm',hue='Species', palette='Set2')

In [None]:
sns.scatterplot(data = data, x = 'PetalWidthCm', y = 'SepalWidthCm',hue='Species', palette='Set2')

In [None]:
data.corr(numeric_only= True)

In [None]:
Columns = ['SepalLengthCm',	'SepalWidthCm',	'PetalLengthCm',	'PetalWidthCm']

In [None]:
plt.figure(figsize = (10,10))
for i, col in enumerate(Columns):
    plt.subplot(4, 1, i+1)  # create a subplot for each feature
    sns.scatterplot(x=col, y='Species', data=data,hue='Species', palette='Set2')
    plt.title(f"{col} vs. Species")
    plt.xlabel(col)
    plt.ylabel("Species")

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
sns.heatmap(data.corr(numeric_only= True), annot = True)

In [None]:
sns.pairplot(data, hue="Species", height = 3)

#### Label Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
Label = LabelEncoder()

In [None]:
data['Species'] = Label.fit_transform(data['Species'])

In [None]:
data.head()

In [None]:
data['Species'].value_counts()

### Model 

In [None]:
X = data.drop(columns = 'Species', axis = 1)
Y = data['Species']

In [None]:

from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=19)

In [None]:
X_train.head()

In [None]:
X_test.head()

In [None]:
# Import necessary libraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Assuming X and Y are your feature and target variables
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, Y_train)
Y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(Y_test, Y_pred_log_reg)
print("Logistic Regression Accuracy:", log_reg_accuracy)

# Support Vector Machine (SVM)
svm = SVC()
svm.fit(X_train, Y_train)
Y_pred_svm = svm.predict(X_test)
svm_accuracy = accuracy_score(Y_test, Y_pred_svm)
print("Support Vector Machine Accuracy:", svm_accuracy)

# Decision Tree Classifier
dec_tree = DecisionTreeClassifier()
dec_tree.fit(X_train, Y_train)
Y_pred_dec_tree = dec_tree.predict(X_test)
dec_tree_accuracy = accuracy_score(Y_test, Y_pred_dec_tree)
print("Decision Tree Classifier Accuracy:", dec_tree_accuracy)

# K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
Y_pred_knn = knn.predict(X_test)
knn_accuracy = accuracy_score(Y_test, Y_pred_knn)
print("K-Nearest Neighbors Classifier Accuracy:", knn_accuracy)

## SVM and KNN has same accuracy

In [None]:
import pickle

In [None]:
pickle.dump(svm, open('model_svm.pkl','wb'))

# Loading model to compare the results
model = pickle.load(open('model_svm.pkl','rb'))

In [None]:
pickle.dump(knn, open('model_knn.pkl','wb'))

# Loading model to compare the results
model = pickle.load(open('model_knn.pkl','rb'))

In [None]:
processed_iris = data.to_csv("D:\DS_March\Iris\processed_iris.csv", index = False)