##**Importing Libraries**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score

##**Data Collection**

In [None]:
data = pd.read_csv("iris.csv")

##**Data Analysis**

In [None]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
data.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [None]:
data.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [None]:
data['species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: species, dtype: int64

In [None]:
data.shape

(150, 5)

##**Encoding the Categorical Columns**

In [None]:
# Converting the categorical columns
data.replace({"species": {"Iris-setosa": 1, "Iris-versicolor": 2, "Iris-virginica": 3}}, inplace=True)

In [None]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1


##**Separating target and features**

In [None]:
x = data.drop('species', axis=1)
y = data[['species']]

In [None]:
print(x.head())
print(y.head())

   sepal_length  sepal_width  petal_length  petal_width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2
   species
0        1
1        1
2        1
3        1
4        1


In [None]:
print(x.shape)
print(y.shape)

(150, 4)
(150, 1)


##**Splitting the data into Train and Test data**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [None]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(120, 4) (30, 4) (120, 1) (30, 1)


##**Model Training**

**Using LogisticRegression**

In [None]:
model_1 = LogisticRegression()

In [None]:
model_1.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


**Using RandomForestClassifier**

In [None]:
model_2 = RandomForestClassifier()

In [None]:
model_2.fit(x_train, y_train)

  model_2.fit(x_train, y_train)


**Using DecisionTreeClassifier Model**

In [None]:
model_3 = DecisionTreeClassifier()

In [None]:
model_3.fit(x_train, y_train)

##**Model Evaluation**

In [None]:
# Make predictions and evaluate the model_1
x_test_prediction_1 = model_1.predict(x_test)
accuracy_1 = accuracy_score(y_test, x_test_prediction_1)

# Make predictions and evaluate the model_2
x_test_prediction_2 = model_2.predict(x_test)
accuracy_2 = accuracy_score(y_test, x_test_prediction_2)

# Make predictions and evaluate the model_3
x_test_prediction_3 = model_3.predict(x_test)
accuracy_3 = accuracy_score(y_test, x_test_prediction_3)

In [None]:
print("LogisticRegression Model\nAccuracy Score: ", accuracy_1)
print()
print("RandomForestClassifier Model\nAccuracy Score: ", accuracy_2)
print()
print("DecisionTreeClassifier Model\nAccuracy Score: ", accuracy_3)

LogisticRegression Model
Accuracy Score:  0.9666666666666667

RandomForestClassifier Model
Accuracy Score:  0.9666666666666667

DecisionTreeClassifier Model
Accuracy Score:  0.9333333333333333
