In [37]:
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [38]:
raw_data = pd.read_csv('iris.data')

In [39]:
print(raw_data.head(10))

   SepalLength(Cm)  SepalWidth(Cm)  PetalLength(Cm)  PetalWidth(Cm)  \
0              5.1             3.5              1.4             0.2   
1              4.9             3.0              1.4             0.2   
2              4.7             3.2              1.3             0.2   
3              4.6             3.1              1.5             0.2   
4              5.0             3.6              1.4             0.2   
5              5.4             3.9              1.7             0.4   
6              4.6             3.4              1.4             0.3   
7              5.0             3.4              1.5             0.2   
8              4.4             2.9              1.4             0.2   
9              4.9             3.1              1.5             0.1   

       Species  
0  Iris-setosa  
1  Iris-setosa  
2  Iris-setosa  
3  Iris-setosa  
4  Iris-setosa  
5  Iris-setosa  
6  Iris-setosa  
7  Iris-setosa  
8  Iris-setosa  
9  Iris-setosa  


In [40]:
raw_data.columns

Index(['SepalLength(Cm)', 'SepalWidth(Cm)', 'PetalLength(Cm)',
       'PetalWidth(Cm)', 'Species'],
      dtype='object')

In [41]:
raw_data.describe()

Unnamed: 0,SepalLength(Cm),SepalWidth(Cm),PetalLength(Cm),PetalWidth(Cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [42]:
raw_data.isnull().sum()

SepalLength(Cm)    0
SepalWidth(Cm)     0
PetalLength(Cm)    0
PetalWidth(Cm)     0
Species            0
dtype: int64

In [43]:
encode = LabelEncoder()

In [44]:
data = raw_data.copy()

In [45]:
#label encode the target variable

data.Species = encode.fit_transform(data.Species)

In [46]:
data

Unnamed: 0,SepalLength(Cm),SepalWidth(Cm),PetalLength(Cm),PetalWidth(Cm),Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [82]:
# train-test-split   

train, test = train_test_split(data, test_size=0.20, random_state=365)

In [83]:
print('Shape of training data: ', train.shape)
print('Shape of testing data: ', test.shape)


Shape of training data:  (120, 5)
Shape of testing data:  (30, 5)


In [84]:
# seperate the target and independent variable

train_x = train.drop(columns=['Species'], axis=1)
train_y = train['Species']

test_x = test.drop(columns=['Species'], axis=1)
test_y = test['Species']

In [85]:
# create the object of the model

model = LR()
model.fit(train_x, train_y)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [86]:
predict = model.predict(test_x)

In [87]:
print('Predicted values on test data: ', encode.inverse_transform(predict))

Predicted values on test data:  ['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'
 'Iris-versicolor']


In [88]:
print('Accuracy score on test data: ')
print(accuracy_score(test_y, predict))

Accuracy score on test data: 
0.9666666666666667
