In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import joblib  # For saving and loading the model
import pickle  # For saving and loading the model

# Load the dataset
data = pd.read_csv('iris.csv')

# Split the data into features and target
X = data.drop('species', axis=1)
y = data['species']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the decision tree model
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the model's performance
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='weighted')
recall = metrics.recall_score(y_test, y_pred, average='weighted')
f1_score = metrics.f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1_score:.2f}")

Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00


In [2]:
# Step 1: Save the model as a .pkl file
model_filename = 'decision_tree_model_iris.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(clf, file)
print(f"Model saved as {model_filename}")

Model saved as decision_tree_model_iris.pkl


In [3]:
data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [4]:

# Example usage of the function:
new_samples = pd.DataFrame({
    'sepal_length': [5.9],
    'sepal_width': [3.0],
    'petal_length': [5.1],
    'petal_width': [1.8]
})

with open(model_filename, 'rb') as file:
        model = pickle.load(file)
    # Predict the class for the new data
prediction = model.predict(new_samples)
prediction[0]

'virginica'

In [5]:
# Step 2: Create a function to load the model and predict
def predict_species(new_data):
    # Load the saved model
    with open(model_filename, 'rb') as file:
        model = pickle.load(file)
    # Predict the class for the new data
    prediction = model.predict(new_data)
    return prediction

# Example usage of the function:
new_samples = pd.DataFrame({
    'sepal_length': [5.0],
    'sepal_width': [3.6],
    'petal_length': [1.4],
    'petal_width': [0.2]
})

predictions = predict_species(new_samples)
print("Predicted species for new samples:", predictions[0])

Predicted species for new samples: setosa


In [6]:
new_samples

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.0,3.6,1.4,0.2
