In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score 


In [10]:
# Load the dataset from iris.csv
df = pd.read_csv('Iris.csv')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [11]:
# Identify the independent variables X and dependent variable y
X = df.drop('Species', axis=1)

In [12]:
# Encode the dependent variable 'species' using mapping
species_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 1}
df['target'] = df['Species'].replace(species_mapping)

# Drop the original 'species' column
df.drop(['Species'], axis=1, inplace=True)

# Separate the independent variables (x) and the dependent variable (y)
x = df.drop(['target'], axis=1)
y = df['target']

# Show the DataFrame with the target column encoded as 0 and 1
print(df.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  target
0   1            5.1           3.5            1.4           0.2       0
1   2            4.9           3.0            1.4           0.2       0
2   3            4.7           3.2            1.3           0.2       0
3   4            4.6           3.1            1.5           0.2       0
4   5            5.0           3.6            1.4           0.2       0


In [14]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
# Create a logistic regression model
model = LogisticRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)


In [16]:
# Generate a confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print(confusion_mat)


[[10  0]
 [ 0 20]]


Analyze the confusion matrix to make predictions about precision and recall
Since we are predicting 'Iris-setosa' as class 0 and 'Iris-versicolor'/'Iris-virginica' as class 1, we are interested in the values in the first row of the confusion matrix.
The first row represents the 'Iris-setosa' class, and the second row represents the 'Iris-versicolor'/'Iris-virginica' class.

True Positives (TP) = 10 (correctly predicted 'Iris-setosa')
True Negatives (TN) = 20 (correctly predicted 'Iris-versicolor'/'Iris-virginica')
False Positives (FP) = 0 (incorrectly predicted 'Iris-versicolor'/'Iris-virginica' as 'Iris-setosa')
False Negatives (FN) = 0 (incorrectly predicted 'Iris-setosa' as 'Iris-versicolor'/'Iris-virginica')

Precision:
Precision = TP / (TP + FP) = 10 / (10 + 0) = 1.0

Recall:
Recall = TP / (TP + FN) = 10 / (10 + 0) = 1.0

Precision: The model has perfect precision because there are no false positives (FP=0). So when the model predicts a data point as 'Iris-setosa' (class 0), it is correct at all times. The model has higher precision for the 'Iris-setosa' class.

Recall: The model has perfect recall because there are no false negatives (FN=0). This means that the model correctly identifies all the 'Iris-setosa' samples, and there are no instances of 'Iris-setosa' being incorrectly classified as 'Iris-versicolor'/'Iris-virginica'. The model has higher recall for the 'Iris-setosa' class.

In [17]:
# Calculate precision, recall, and accuracy using my own code
accuracy = (confusion_mat[0, 0] + confusion_mat[1, 1]) / confusion_mat.sum()
precision = confusion_mat[1, 1] / (confusion_mat[0, 1] + confusion_mat[1, 1])
recall = confusion_mat[1, 1] / (confusion_mat[1, 0] + confusion_mat[1, 1])

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [18]:
# Confirm accuracy, precision, and recall written with own code
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)


In [19]:
# Print the confusion matrix, accuracy, precision, and recall
print("Confusion Matrix:")
print(confusion_mat)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Confusion Matrix:
[[10  0]
 [ 0 20]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0


Therefore own code is correct.