<a href="https://colab.research.google.com/github/mnDylan/iris-classification-project/blob/main/Iris_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1 // Install library**

In [None]:
!pip install firebase-admin

# **2 //Import Library**

In [None]:
import firebase_admin
from firebase_admin import credentials, db, firestore
import json
from google.colab import files

from sklearn.datasets import load_iris
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# **3 // Upload file credentials JSON**

In [None]:
uploaded = files.upload()

In [None]:
filename = list(uploaded.keys())[0]

# **4 // Initialize Firebase App**

In [None]:
import firebase_admin
from firebase_admin import credentials

cred = credentials.Certificate(filename)
firebase_admin.initialize_app(cred)


# **5 // Initialize Firestore client**

In [None]:
firestore_db = firestore.client()

# **6 // Load Iris Dataset**


In [None]:
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns = iris.feature_names)
iris_df['target'] = iris.target
iris_df['target_name'] = iris_df['target'].map(lambda x: iris.target_names[x])

print(f"Dataset shape: {iris_df.shape}")
iris_df.head()


# **7 // Insert | Delete Data Into Firebase**

## * 7.1 // Insert Data*

In [None]:
def upload_iris_to_firestore():
    collection_name = 'iris_dataset'
    batch_size = 100
    total_records = len(iris_df)

    for i in range(0, total_records, batch_size):
      batch = firestore_db.batch()
      batch_data = iris_df.iloc[i:i+batch_size]
      for idx, row in batch_data.iterrows():
          doc_ref = firestore_db.collection(collection_name).document(f'sample_{idx}')

          # Transfrom numpy type into Python native types
          data = {
              'sepal_length_cm': float (row['sepal length (cm)']),
              'sepal_width_cm': float (row['sepal width (cm)']),
              'petal_length_cm': float (row['petal length (cm)']),
              'petal_width_cm': float (row['petal width (cm)']),
              'target': int (row['target']),
              'target_name': str (row['target_name']),
              'sample_id': int(idx)
          }

          batch.set(doc_ref, data)

      # Commit batch
      batch.commit()
      print(f"Uploaded batch {i//batch_size + 1}: records {i+1}-{min(i+batch_size, total_records)}")

    print(f"\nSuccessfully uploaded {total_records} records to Firestore collection '{collection_name}'")

try:
  upload_iris_to_firestore()
  print('\n Upload compledted successfully')
except Exception as e:
  print(f" Error during upload: {str(e)}")

# Check the uploaded data
def verify_upload():
    collection_name = 'iris_dataset'
    docs = firestore_db.collection(collection_name).limit(5).stream()

    print("\nVerifying upload - First 5 documents:")
    for doc in docs:
        print(f"Document ID: {doc.id}")
        print(f"Data: {doc.to_dict()}")
        print("-" * 50)

# RUN CHECK UPLOAD DATA
verify_upload()

# Uploaded data statistics
def get_collection_stats():
    collection_name = 'iris_dataset'
    docs = list(firestore_db.collection(collection_name).stream())

    print(f"\nCollection Statistics:")
    print(f"Total documents: {len(docs)}")

    # Count by target_name
    target_counts = {}
    for doc in docs:
        data = doc.to_dict()
        target_name = data.get('target_name', 'unknown')
        target_counts[target_name] = target_counts.get(target_name, 0) + 1

    print(f"Distribution by species:")
    for species, count in target_counts.items():
        print(f"  {species}: {count}")

# RUN STATISTICS
get_collection_stats()

## * 7.2 // Delete Data *

In [None]:
def delete_data_firestore():
    """
    Deletes all documents in the 'iris_dataset' Firestore collection.
    Use with caution as this operation is irreversible.
    """
    try:
        # Reference to the collection
        iris_ref = firestore_db.collection('iris_dataset')

        # Get all documents (with batch size limit consideration)
        docs = iris_ref.limit(100).stream()  # Firestore has batch operation limits

        # Delete each document
        deleted_count = 0
        for doc in docs:
            doc.reference.delete()
            deleted_count += 1

        print(f"Successfully deleted {deleted_count} documents.")
        return deleted_count

    except Exception as e:
        print(f"Error deleting documents: {e}")
        return 0


# **8 // Load Iris Data From Firestore**

In [None]:
def load_iris_from_firestore():
  iris_ref = firestore_db.collection('iris_dataset')
  docs = iris_ref.stream()

  records = []
  for doc in docs:
    data = doc.to_dict()
    records.append(data)

  # Create Dataframe
  iris_df = pd.DataFrame(records)
  return iris_df

# Load the iris dataset from Firestore
load_pd_iris = load_iris_from_firestore()

# Display the first few rows of the dataframe
load_pd_iris.head()

# Filter rows where target equals 1 (corrected syntax)
load_pd_iris[load_pd_iris['target'] == 1]

# **9 // Visualize Iris Dataset**

In [None]:
load_pd_iris.columns

# **10 // Prepare Data for Training**

In [None]:
# Step 1: Load Data from Firestore
iris_df = load_iris_from_firestore()

# Print the columns of the loaded DataFrame to verify
print("Columns after loading from Firestore:", iris_df.columns)

# Step 2: Prepare Data
# X is used for features, y for the target variable
X = iris_df[['sepal_length_cm', 'sepal_width_cm','petal_length_cm','petal_width_cm']]
y = iris_df['target_name']

# Encode target_name (string) to number
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

print("\nData prepared and split successfully.")
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

# **11 // Initialize and Training Decision Tree**

In [None]:
# Training Model
clf = DecisionTreeClassifier(random_state = 42)
clf.fit(X_train, y_train)


In [None]:
#Prediction
y_pred = clf.predict(X_test)

# Evaluate Model
print("\n Accuracy:", accuracy_score(y_test, y_pred))
print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names = le.classes_))

In [None]:
# Vizualize Decision Tree
from sklearn.tree import plot_tree

plt.figure(figsize =(16, 10))
plot_tree(clf, feature_names = X.columns, class_names = le.classes_, filled =True)
plt.title("Decision Tree for Iris Dataset")
plt.show()

# **12 // Predict Function**

In [None]:
def predict_sample(sepal_length_cm, sepal_width_cm, petal_length_cm, petal_width_cm):
  # Create 2D input array for sckit-learn
  sample = [[sepal_length_cm, sepal_width_cm, petal_length_cm, petal_width_cm]]

  # Predict Label
  pred_label = clf.predict(sample)[0]

  # Convert Number Label to Species
  pred_species = le.inverse_transform([pred_label])[0]

  print("Prediction:")
  print(f" Species: {pred_species}")
  return pred_species


In [None]:
# Sample test
sepal_length_cm = float(input("Enter sepal length:"))
sepal_width_cm = float(input("Enter sepal width:"))
petal_length_cm = float(input("Enter petal length:"))
petal_width_cm = float(input("Enter petal width:"))

predict_sample(sepal_length_cm, sepal_width_cm, petal_length_cm, petal_width_cm)