# Importing the important packages

In [44]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [45]:
# Importing the Iris Dataset.

data = pd.read_csv('iris.data')
print(data.head())

   5.1  3.5  1.4  0.2  Iris-setosa
0  4.9  3.0  1.4  0.2  Iris-setosa
1  4.7  3.2  1.3  0.2  Iris-setosa
2  4.6  3.1  1.5  0.2  Iris-setosa
3  5.0  3.6  1.4  0.2  Iris-setosa
4  5.4  3.9  1.7  0.4  Iris-setosa


In [46]:
# Checking the dataset columns

data.columns

Index(['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'], dtype='object')

In [47]:
# Changing the dataset column names

data.columns = ['s_length', 's_width', 'p_length', 'p_width', 'Species']

In [48]:
# checking the dataset head

data.head()

Unnamed: 0,s_length,s_width,p_length,p_width,Species
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [52]:
# checking the unique species in the dataset.

data.Species.unique()

array([0, 1, 2])

In [50]:
#label encode the target variable

encode = LabelEncoder()
data['Species'] = encode.fit_transform(data.Species)

In [53]:
# dataset after the encoding.

data.head()

Unnamed: 0,s_length,s_width,p_length,p_width,Species
0,4.9,3.0,1.4,0.2,0
1,4.7,3.2,1.3,0.2,0
2,4.6,3.1,1.5,0.2,0
3,5.0,3.6,1.4,0.2,0
4,5.4,3.9,1.7,0.4,0


In [29]:
# using Sklearn train_test_split method to split dataset in training and testing sets.

train , test = train_test_split(data,test_size=0.2,random_state=0)

In [30]:
# printing the shapes of the training and testing dataset.

print('shape of training data : ',train.shape)
print('shape of testing data',test.shape)

shape of training data :  (119, 5)
shape of testing data (30, 5)


In [31]:
# removing species data from training set, and only keeping the species data in the output file for comparison.

train_x = train.drop(columns=['Species'],axis=1)
train_y = train['Species']

In [33]:
train_x.head()

Unnamed: 0,s_length,s_width,p_length,p_width
27,5.2,3.4,1.4,0.2
97,5.1,2.5,3.0,1.1
96,6.2,2.9,4.3,1.3
69,5.9,3.2,4.8,1.8
18,5.1,3.8,1.5,0.3


In [35]:
train_y.head()

27    0
97    1
96    1
69    1
18    0
Name: Species, dtype: int64

In [36]:
# removing species data from testing set, and only keeping the species data in the output file for comparison.


test_x = test.drop(columns=['Species'],axis=1)
test_y = test['Species']

In [37]:
test_x.head()

Unnamed: 0,s_length,s_width,p_length,p_width
133,6.1,2.6,5.6,1.4
109,6.5,3.2,5.1,2.0
59,5.0,2.0,3.5,1.0
80,5.5,2.4,3.7,1.0
7,4.4,2.9,1.4,0.2


In [38]:
test_y.head()

133    2
109    2
59     1
80     1
7      0
Name: Species, dtype: int64

In [39]:
# initiate Logistic regression model to fit on training set.

model = LogisticRegression()

model.fit(train_x,train_y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [40]:
# predicting the species from the test set.

predict = model.predict(test_x)

In [42]:
# Checking the accuracy_Score of the model and getting the actual species name using encode.inverser_transform method.

print('Predicted Values on Test Data',encode.inverse_transform(predict))

print('\n\nAccuracy Score on test data : \n\n')
print(accuracy_score(test_y,predict))

Predicted Values on Test Data ['Iris-virginica' 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'
 'Iris-setosa' 'Iris-virginica']


Accuracy Score on test data : 


0.9333333333333333
