# Import Required Libraries

In [27]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Read the dataset

In [28]:
data = pd.read_csv('Iris.csv')
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [29]:
print('Column Names',data.columns)

Column Names Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'Species'],
      dtype='object')


# Label encode the target variable

In [30]:
encode = LabelEncoder()
data.Species = encode.fit_transform(data.Species)
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


# Find Independent Variable (Feature)

In [31]:
X=data.iloc[:,:-1]
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


# Find dependent Variable (Target)

In [32]:
y=data.iloc[:,-1]
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Species, dtype: int32

# Split dataset into train and test dataset

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((100, 4), (50, 4), (100,), (50,))

# Apply Standard Scaling to scale data on same scale

In [34]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# Train the System to get model

In [35]:
model = LogisticRegression(solver="lbfgs",multi_class='auto')
model.fit(X_train,y_train)
predict = model.predict(X_test)
predict

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 1, 2, 1, 2])

In [36]:
print('Predicted Values on Test Data',encode.inverse_transform(predict))

Predicted Values on Test Data ['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'
 'Iris-virginica']


# Accuracy on test dataset

In [37]:
print('Accuracy Score on test data',accuracy_score(y_test,predict))

Accuracy Score on test data 0.98


# Cross validation on Train dataset

In [38]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(model, X=X_train, y=y_train, cv=5)
print("10-fold Cross Validation on Train Dataset,Accuracy is ",accuracies.mean()*100)

10-fold Cross Validation on Train Dataset,Accuracy is  92.94736842105264


# Cross validation on Test dataset

In [40]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(model, X=X_test, y=y_test, cv=5)
print("10-fold Cross Validation on Test Dataset,Accuracy is ",accuracies.mean()*100)

10-fold Cross Validation on Test Dataset,Accuracy is  97.77777777777779
