# Iris Flower Classification

In [1]:
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import LabelEncoder

In [2]:
#Input dataset
data=pd.read_csv("Iris.csv")
print("Dataset : \n",data)

Dataset : 
       Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
4      5            5.0           3.6            1.4           0.2   
..   ...            ...           ...            ...           ...   
145  146            6.7           3.0            5.2           2.3   
146  147            6.3           2.5            5.0           1.9   
147  148            6.5           3.0            5.2           2.0   
148  149            6.2           3.4            5.4           2.3   
149  150            5.9           3.0            5.1           1.8   

            Species  
0       Iris-setosa  
1       Iris-setosa  
2       Iris-setosa  
3       Iris-setosa  
4       Iris-setosa  
..             

In [3]:
#Describe dataset
data.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [5]:
#Checking for null values
data.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [6]:
#Label Encoding
label_encoder =LabelEncoder()
data['Species']= label_encoder.fit_transform(data['Species'])

In [7]:
#Define independent and dependent variables

x=data.iloc[:,:3]
y=data.iloc[:,-1]

print("Independent variables are : \n",x)
print("\nDependent variables are : \n",y)

Independent variables are : 
       Id  SepalLengthCm  SepalWidthCm
0      1            5.1           3.5
1      2            4.9           3.0
2      3            4.7           3.2
3      4            4.6           3.1
4      5            5.0           3.6
..   ...            ...           ...
145  146            6.7           3.0
146  147            6.3           2.5
147  148            6.5           3.0
148  149            6.2           3.4
149  150            5.9           3.0

[150 rows x 3 columns]

Dependent variables are : 
 0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int32


In [8]:
#Split training and test data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=4)

In [9]:
# Fit the model
lm=LinearRegression()
model=lm.fit(x_train,y_train)
print("\nModel : ",model)


Model :  LinearRegression()


In [10]:
#Calculate predicted values
y_pred=model.predict(x_test)
print(f'\ny_test : \n{y_test} \n\ny_pred : \n{y_pred}')


y_test : 
128    2
18     0
130    2
105    2
107    2
78     1
83     1
14     0
5      0
133    2
25     0
11     0
12     0
63     1
113    2
34     0
60     1
2      0
24     0
123    2
35     0
124    2
68     1
26     0
29     0
19     0
41     0
16     0
20     0
101    2
84     1
47     0
108    2
1      0
93     1
144    2
125    2
92     1
86     1
46     0
135    2
7      0
65     1
10     0
132    2
Name: Species, dtype: int32 

y_pred : 
[ 1.92203501  0.04025646  2.20225825  1.8710475   1.84302057  1.11909531
  1.2284509  -0.03017878 -0.23335146  1.9648499   0.12350109 -0.20029588
 -0.10418756  0.94005503  1.60247654  0.20018469  0.80650444 -0.30695367
 -0.02320287  1.84924594  0.2184836   1.84033107  1.17781567  0.05463785
  0.06085491 -0.0979079   0.35932277 -0.08350352  0.07409387  1.42306099
  1.02842221  0.28076281  1.787342   -0.22873768  1.19418392  2.11278187
  2.00106414  1.32107957  1.36391745  0.26990068  2.30502151 -0.20419041
  1.07784411 -0.12399586  1.97652

In [11]:
#Performance of the model
print("\nCoefficient of determination : \n",model.coef_)
print("\nMean squared error : \n",mean_squared_error(y_test,y_pred))
print("\nR squared error : \n",r2_score(y_test,y_pred))


Coefficient of determination : 
 [ 0.01362254  0.25297816 -0.20621446]

Mean squared error : 
 0.04275450205020629

R squared error : 
 0.9432648318141102
