In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
def preprocess_data(df):
    # Fill NaN values with empty string to avoid issues with TF-IDF
    df = df.fillna("")
    return df

In [None]:
# Load dataset (assuming you have a dataset `wildlife_df` in pandas DataFrame format)
wildlife_df = pd.read_csv('plants.csv')
wildlife_df = preprocess_data(wildlife_df)
wildlife_df.head()

In [None]:
# Combine relevant features into a single text column for similarity comparison
wildlife_df["combined_features"] = wildlife_df["city"] + " " + wildlife_df["forest"] + " " + wildlife_df["Season"] + " " + wildlife_df["active"]

# Train TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(wildlife_df["combined_features"].values.astype('U'))

In [None]:
# Save model and vectorizer to pickle file

with open("plant_model.pkl", "wb") as model_file:
    pickle.dump((wildlife_df, vectorizer, tfidf_matrix), model_file)

In [None]:
def get_top_plants(location, season, visit_time, top_n=5):
    # Load model from pickle file
    with open("plant_model.pkl", "rb") as model_file:
        wildlife_df, vectorizer, tfidf_matrix = pickle.load(model_file)

    # User query formatted similarly
    user_query = location + " " + season + " " + visit_time
    user_tfidf = vectorizer.transform([user_query])

    # Compute cosine similarity
    similarity_scores = cosine_similarity(user_tfidf, tfidf_matrix).flatten()

    # Get indices of top N matches
    top_indices = similarity_scores.argsort()[-top_n:][::-1]

    # Get the top matching animals along with relevant details
    result_df = wildlife_df.iloc[top_indices][["name", "city","forest", "Season", "active", "Region", "Unique Fact","Endemic Status","Conservation Status"]]

    # Compute accuracy as the average similarity score of the top matches
    accuracy = similarity_scores[top_indices].mean() * 100  # Convert to percentage

    # Convert to HTML table format with custom styling
    table_html = result_df.to_html(index=False, escape=False, classes="styled-table")

    # Append accuracy information
    table_html += f"<p><strong>Accuracy of top {top_n} matches: {accuracy:.2f}%</strong></p>"

    return table_html


In [None]:
# Prompt user for input
user_location = input("Enter the city/state you are visiting : ")
user_season = input("Enter the season of your visit (summer,winter,mansoon) : ")
user_visit_time = input("Enter the time of day you are visiting (morning, afternoon, night) : ")

In [None]:
# Get recommendations
top_plants_html = get_top_plants(user_location, user_season, user_visit_time)

In [None]:
# Print HTML with custom styles
html_style = """
<style>
.styled-table {
    width: 100%;
    border-collapse: collapse;
    font-family: Arial, sans-serif;
}
.styled-table th {
    background-color: #4CAF50;
    color: white;
    padding: 10px;
    text-align: left;
    font-size: 16px;
}
.styled-table td {
    padding: 8px;
    border-bottom: 1px solid #ddd;
}
.styled-table tr:nth-child(even) {
    background-color: #f2f2f2;
}
.styled-table tr:nth-child(odd) {
    background-color: #ffffff;
}
</style>
"""
print(html_style + top_plants_html)