In [39]:
import pandas as pd
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

df = pd.read_csv("../Data_files/iris.csv")


In [40]:
encoder = LabelEncoder()
df['Species'] = encoder.fit_transform(df['Species'])

In [41]:
clf = SGDClassifier(max_iter=1000, tol=1e-3)

In [42]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [43]:
sgd = SGDClassifier(loss='log', random_state=42)
sgd.fit(X_train, y_train)
svc = SVC(kernel='linear', random_state=42)
svc.fit(X_train, y_train)
y1_pred = svc.predict(X_test)


In [44]:
y_pred = sgd.predict(X_test)


In [45]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 score:", f1_score(y_test, y_pred, average='weighted'))


Accuracy: 0.7
Precision: 0.5153110047846889
Recall: 0.7
F1 score: 0.5863492063492063


  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
scores = cross_val_score(sgd, X, y, cv=5)
print("Cross validation scores:", scores)
print("Average score:", scores.mean())
print(f'\nconfusion matrix: \n {confusion_matrix(y_test, y_pred)}')



Cross validation scores: [0.36666667 0.43333333 0.33333333 0.63333333 0.33333333]
Average score: 0.42000000000000004

confusion matrix: 
 [[10  0  0]
 [ 1  0  8]
 [ 0  0 11]]


In [47]:
# for support vector machines

print("Accuracy:", accuracy_score(y_test, y1_pred))
print("Precision:", precision_score(y_test, y1_pred, average='weighted'))
print("Recall:", recall_score(y_test, y1_pred, average='weighted'))
print("F1 score:", f1_score(y_test, y1_pred, average='weighted'))

scores = cross_val_score(svc, X, y, cv=5)
print("Cross validation scores:", scores)
print("Average score:", scores.mean())

print(f'\nconfusion matrix: \n {confusion_matrix(y_test, y1_pred)}')

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 score: 1.0
Cross validation scores: [0.66666667 1.         1.         1.         0.7       ]
Average score: 0.8733333333333333

confusion matrix: 
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [48]:
print('The confusion matrix for both logistic regression and support vector machines shows that setosa has perfect classification and remaining two species versicolor and virginica are confused with each other.')

The confusion matrix for both logistic regression and support vector machines shows that setosa has perfect classification and remaining two species versicolor and virginica are confused with each other.
