### **Importing the Important libraries**

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## **Importing the dataset and exploring it**

In [3]:
data=pd.read_csv("/content/iris.data")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   5.1          149 non-null    float64
 1   3.5          149 non-null    float64
 2   1.4          149 non-null    float64
 3   0.2          149 non-null    float64
 4   Iris-setosa  149 non-null    object 
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [5]:
data.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [6]:
data.tail()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica
148,5.9,3.0,5.1,1.8,Iris-virginica


## **Giving the columns or features names as they do not exist in the given dataset**

In [7]:
column_names = ['feature_1', 'feature_2', 'feature_3','feature_4','name']
data.columns = column_names

In [8]:
data.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,name
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   feature_1  149 non-null    float64
 1   feature_2  149 non-null    float64
 2   feature_3  149 non-null    float64
 3   feature_4  149 non-null    float64
 4   name       149 non-null    object 
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [10]:
data.describe()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4
count,149.0,149.0,149.0,149.0
mean,5.848322,3.051007,3.774497,1.205369
std,0.828594,0.433499,1.759651,0.761292
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.4,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


## **Checking how many types of outputs are there and then encoding them into numbers**

In [13]:
unique_values = data['name'].unique()
num_unique_values = len(unique_values)

print(f"Number of unique categorical values in '{'name'}': {num_unique_values}")


Number of unique categorical values in 'name': 3


In [14]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
data['name'] = label_encoder.fit_transform(data['name'])

In [15]:
data.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,name
0,4.9,3.0,1.4,0.2,0
1,4.7,3.2,1.3,0.2,0
2,4.6,3.1,1.5,0.2,0
3,5.0,3.6,1.4,0.2,0
4,5.4,3.9,1.7,0.4,0


In [16]:
data.tail()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,name
144,6.7,3.0,5.2,2.3,2
145,6.3,2.5,5.0,1.9,2
146,6.5,3.0,5.2,2.0,2
147,6.2,3.4,5.4,2.3,2
148,5.9,3.0,5.1,1.8,2


## **Shuffling the dataset randomly**

In [17]:
data = data.sample(frac=1, random_state=42)

In [18]:
data.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,name
73,6.4,2.9,4.3,1.3,1
18,5.1,3.8,1.5,0.3,0
117,7.7,2.6,6.9,2.3,2
78,5.7,2.6,3.5,1.0,1
76,6.7,3.0,5.0,1.7,1


## **Splitting the dataset into train and test**

In [19]:
from sklearn.model_selection import train_test_split

X = data.drop('name', axis=1)
y = data['name']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Applying the logostic regression model**

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [21]:
logistic_reg_model = LogisticRegression()
logistic_reg_model.fit(X_train, y_train)

In [22]:
y_pred = logistic_reg_model.predict(X_test)

In [23]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print('Classification Report:')
print(class_report)


Accuracy: 0.97
Confusion Matrix:
[[12  0  0]
 [ 0 11  1]
 [ 0  0  6]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      0.92      0.96        12
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



# **Applying the decision tree model**

In [24]:
from sklearn.tree import DecisionTreeClassifier

decision_tree_model = DecisionTreeClassifier(random_state=42)

decision_tree_model.fit(X_train, y_train)

y_pred_decision_tree = decision_tree_model.predict(X_test)

accuracy_decision_tree = accuracy_score(y_test, y_pred_decision_tree)
print(f'Decision Tree Accuracy: {accuracy_decision_tree:.2f}')

Decision Tree Accuracy: 0.90


# **Applying the random forest model**

In [25]:
from sklearn.ensemble import RandomForestClassifier

random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)

random_forest_model.fit(X_train, y_train)

y_pred_random_forest = random_forest_model.predict(X_test)

accuracy_random_forest = accuracy_score(y_test, y_pred_random_forest)
print(f'Random Forest Accuracy: {accuracy_random_forest:.2f}')

Random Forest Accuracy: 0.93


# **Applying the Support vector machine model**

In [26]:
from sklearn.svm import SVC

svm_model = SVC(kernel='linear', C=1, random_state=42)

svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {accuracy_svm:.2f}')


SVM Accuracy: 0.93
