Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Importing File

In [3]:
Iris_data = pd.read_csv(r"C:\Users\sidli\Downloads\Python_Projects\Machine_Learning_Practice\Data\Iris.csv") 
# r stands for raw string, it prevents escape characters from being processed
# escape characters are used in file paths, like \n for newline, \t for tab, etc.
print(Iris_data.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


Visualization of Dataset

In [4]:
Iris_data.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [None]:
sns.pairplot(Iris_data.iloc[:, 1:], hue='Species', markers=["o", "s", "D"])
plt.show()

Separating Input and Output Columns

In [None]:
Iris_values = Iris_data.values
X = Iris_values[:, 1:5]  # Input features (sepal length, sepal width, petal length, petal width)
Y = Iris_values[:, 5]    # Output feature (species)

In [None]:
print(X)

In [None]:
print(Y)

Splitting the Data into Training and Testing

In [None]:
from sklearn.model_selection import train_test_split # Importing train_test_split from sklearn
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) # Splitting the original dataframe into training and testing sets
print(Y_test)

Model 1: SVM Algorithm (Study)

In [None]:
# Support Vector Machine (SVM) Algorithm
from sklearn.svm import SVC
model_svc = SVC(kernel='linear')  # Using a linear kernel for SVM
model_svc.fit(X_train, Y_train)  # Training the SVM model
predictions_svc = model_svc.predict(X_test)  # Making predictions on the test set
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
print("SVM Model Accuracy:", accuracy_score(Y_test, predictions_svc))  # Printing the accuracy of the SVM model
print("SVM Classification Report:\n", classification_report(Y_test, predictions_svc))  # Printing the classification report
print("SVM Confusion Matrix:\n", confusion_matrix(Y_test, predictions_svc)) # Printing the confusion matrix 
# Printing Labelled Values vs Predicted Values
for i in range(len(predictions_svc)):
    print(f"Labelled Value: {Y_test[i]}, Predicted Value: {predictions_svc[i]}")

Model 2: Logistic Regression (Study)

In [None]:
# Logistic Regression Algorithm
from sklearn.linear_model import LogisticRegression
model_lr = LogisticRegression(max_iter=200)  # Creating a logistic regression model with a maximum of 200 iterations
model_lr.fit(X_train, Y_train)  # Training the logistic regression model
predictions_lr = model_lr.predict(X_test)  # Making predictions on the test set
print("Logistic Regression Model Accuracy:", accuracy_score(Y_test, predictions_lr))  # Printing the accuracy of the logistic regression model
print("Logistic Regression Classification Report:\n", classification_report(Y_test, predictions_lr))  # Printing the classification report
print("Logistic Regression Confusion Matrix:\n", confusion_matrix(Y_test, predictions_lr))  # Printing Labelled Values vs Predicted Values
for i in range(len(predictions_lr)):
    print(f"Labelled Value: {Y_test[i]}, Predicted Value: {predictions_lr[i]}")  # Printing the labelled values vs predicted values

Model 3: Decision Tree (Study)

In [None]:
# Decision Tree Algorithm
from sklearn.tree import DecisionTreeClassifier
model_dt = DecisionTreeClassifier()  # Creating a decision tree classifier
model_dt.fit(X_train, Y_train)  # Training the decision tree model
predictions_dt = model_dt.predict(X_test)  # Making predictions on the test set
print("Decision Tree Model Accuracy:", accuracy_score(Y_test, predictions_dt))  # Printing the accuracy of the decision tree model
print("Decision Tree Classification Report:\n", classification_report(Y_test, predictions_dt))  # Printing the classification report
print("Decision Tree Confusion Matrix:\n", confusion_matrix(Y_test, predictions_dt))  # Printing Labelled Values vs Predicted Values
for i in range(len(predictions_dt)):
    print(f"Labelled Value: {Y_test[i]}, Predicted Value: {predictions_dt[i]}")  # Printing the labelled values vs predicted values

Prediction

In [None]:
random_samples = X[np.random.choice(X.shape[0], 3, replace=False)] # Randomly selecting 3 samples from the dataset
prediction = model_svc.predict(random_samples)  # Making predictions on the random samples
print("Prediction of Species: ", prediction)  # Printing the predicted species for the random samples

Model 4: Random Forest Classifier (Study)

In [None]:
# Random Forest Classifier Algorithm
from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=100)  # Creating a random forest classifier with 100 trees
model_rf.fit(X_train, Y_train)  # Training the random forest model
predictions_rf = model_rf.predict(X_test)  # Making predictions on the test set
print("Random Forest Model Accuracy:", accuracy_score(Y_test, predictions_rf))  # Printing the accuracy of the random forest model
print("Random Forest Classification Report:\n", classification_report(Y_test, predictions_rf))  # Printing the classification report
print("Random Forest Confusion Matrix:\n", confusion_matrix(Y_test, predictions_rf))  # Printing Labelled Values vs Predicted Values
for i in range(len(predictions_rf)):
    print(f"Labelled Value: {Y_test[i]}, Predicted Value: {predictions_rf[i]}")  # Printing the labelled values vs predicted values