In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder


try:
    df = pd.read_csv("Cleaned_Tour (1).csv")
except FileNotFoundError:
    st.error("Dataset not found. Please upload 'Cleaned_Tour (1).csv'.")
    st.stop()

# Converting categorical Yes/No columns to binary (1/0)
categorical_cols = ["adventure", "nature", "hill_station", "water_activites", "religious"]
df[categorical_cols] = df[categorical_cols].applymap(lambda x: 1 if x == "Yes" else 0)

# Encode categorical 'places' column
encoder = LabelEncoder()
df["places"] = encoder.fit_transform(df["places"])

# Define features and target
X = df[["adventure", "nature", "places", "discount_percentage", "original_price", "hill_station", "water_activites", "religious"]]
y = df["discount_price"]

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Streamlit App
st.title("Discount Price Predictor")

# User Input
place = st.selectbox("Places", encoder.classes_)
adventure = st.selectbox("Adventure", ["Yes", "No"])
nature = st.selectbox("Nature", ["Yes", "No"])
hill_station = st.selectbox("Hill Station", ["Yes", "No"])
water_activites = st.selectbox("Water Activities", ["Yes", "No"])
religious = st.selectbox("Religious", ["Yes", "No"])
#place = st.selectbox("Places", encoder.classes_)
discount_percentage = st.number_input("Discount Percentage", min_value=0.0, max_value=100.0, step=0.1)
original_price = st.number_input("Original Price", min_value=1.0, step=0.1)

if st.button("Predict Discount Price"):
    # Validate place encoding
    if place in encoder.classes_:
        place_encoded = encoder.transform([place])[0]
    else:
        st.error("Selected place is not in training data.")
        st.stop()
    
    # Validate discount logic
    if discount_percentage < 0 or discount_percentage > 100:
        st.error("Discount percentage must be between 0 and 100.")
        st.stop()
    
    # Prepare input data
    # Ensure feature names match the training set
input_data = pd.DataFrame({
    "places": [place_encoded],
    "adventure": [1 if adventure == "Yes" else 0],
    "nature": [1 if nature == "Yes" else 0],
    #"places": [place_encoded],
    "discount_percentage": [discount_percentage],
    "original_price": [original_price],
    "hill_station": [1 if hill_station == "Yes" else 0],
    "water_activites": [1 if water_activites == "Yes" else 0],
    "religious": [1 if religious == "Yes" else 0]
}, columns=X_train.columns)  # Ensure the column order is identical

# Predict
prediction = model.predict(input_data)
st.success(f"Predicted Discount Price: ₹{round(prediction[0], 2)}")
