In [None]:
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Set page config
st.set_page_config(page_title="Uber Fare Prediction", layout="wide")

def load_data():
    try:
        df = pd.read_csv("./data/cleaned_data.csv")
        return df
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on failure

df = load_data()

# Load the scaler
with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

# Load the trained model
with open('gradientboost_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Streamlit app
col1, col2 = st.columns([4, 1])

with col1:
    st.title("Uber Fare Prediction App")

with col2:
    st.image("logo.png", width=100)

if not df.empty:
    # Sidebar inputs
    st.sidebar.image("Uber fare.jpg", width=300)
    st.sidebar.header("Please enter your details")

    # Get user input for longitude, latitude, passenger count, year, and trip distance
    pickup_longitude = st.sidebar.number_input('Pickup Longitude', value=0.0)
    pickup_latitude = st.sidebar.number_input('Pickup Latitude', value=0.0)
    dropoff_longitude = st.sidebar.number_input('Dropoff Longitude', value=0.0)
    dropoff_latitude = st.sidebar.number_input('Dropoff Latitude', value=0.0)
    passenger_count = st.sidebar.number_input('Passenger Count', min_value=1, max_value=10, value=1)
    year = st.sidebar.number_input('Year', min_value=2000, max_value=2100, value=2024)
    trip_distance_km = st.sidebar.number_input('Trip Distance (km)', value=0.0)

    # Dropdowns for Hour, Day, and Month
    hour = st.sidebar.selectbox('Hour', list(range(0, 24)))
    day = st.sidebar.selectbox('Day of the Week', list(range(1, 8)))  # Assuming 1=Monday, 7=Sunday
    month = st.sidebar.selectbox('Month', list(range(1, 13)))

    # Convert hour, day, and month to sine and cosine
    hour_sin = np.sin(2 * np.pi * hour / 24)
    hour_cos = np.cos(2 * np.pi * hour / 24)
    day_sin = np.sin(2 * np.pi * day / 7)
    day_cos = np.cos(2 * np.pi * day / 7)
    month_sin = np.sin(2 * np.pi * month / 12)
    month_cos = np.cos(2 * np.pi * month / 12)

    # Prepare the feature inputs for prediction
    feature_inputs = [
        pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude, 
        passenger_count, year, trip_distance_km,
        month_sin, month_cos, hour_sin, hour_cos, day_sin, day_cos
    ]

    # Convert user input to a numpy array
    input_data = np.array([feature_inputs])

    # Scale the user input using the same scaler
    input_data_scaled = scaler.transform(input_data)

    # Make predictions
    prediction = model.predict(input_data_scaled)

    # Display the prediction
    st.header("Prediction")
    st.write(f'The predicted fare is: ${prediction[0]:.2f}')

    st.header("Predicted vs Actual Fare")

    # Assume you have some actual fare data for comparison (this can be from X_test or a small sample)
    actual_fares = df['fare_amount'].sample(5).values
    predicted_fares = np.append(prediction, actual_fares)  # Combine prediction with some actual fares for comparison

    fare_labels = ['Predicted'] + [f'Actual {i+1}' for i in range(len(actual_fares))]

    # Create a bar chart
    fig = go.Figure(data=[
        go.Bar(name='Fare', x=fare_labels, y=predicted_fares)
    ])

    fig.update_layout(title='Predicted vs Actual Fare Comparison',
                      xaxis_title='Fare Type',
                      yaxis_title='Fare Amount',
                      bargap=0.2)

    st.plotly_chart(fig)

    # Main content
    col1, col2 = st.columns(2)

    with col1:
        # Total passenger count
        st.header("Passenger count details")
        passenger_count = df['passenger_count'].value_counts()
        fig1 = px.bar(passenger_count, x=passenger_count.index, y=passenger_count.values,
                      labels={'x': 'Passenger'})
        st.plotly_chart(fig1)

    with col2:
        # Peak hour of the day
        st.header("Peak Hour")
        hourly_counts = df['Hour'].value_counts().sort_index()
        plt.figure(figsize=(10, 6))
        sns.barplot(x=hourly_counts.index, y=hourly_counts.values, palette="viridis")
        plt.xlabel('Hour of the Day')
        plt.ylabel('Number of Rides')
        plt.title('Number of Rides per Hour (Peak Hours)')
        st.pyplot(plt.gcf())  # Use st.pyplot to display matplotlib plots

    # Additional insights
    st.subheader("Key Insights")
    col3, col4, col5 = st.columns(3)

    with col3:
        total_revenue = df['fare_amount'].sum()
        st.metric("Total Revenue", f"${total_revenue:.2f}")

    with col4:
        trip_distance = df['trip_distance_km'].mean()
        st.metric("Average Trip Distance", f"{trip_distance:.2f} km")

    with col5:
        avg_price = df['fare_amount'].mean()
        st.metric("Average Fare", f"${avg_price:.2f}")

    # Create the scatter mapbox plot
    st.subheader("Visualization of places")

    fig = px.scatter_mapbox(
        df,
        lat="pickup_latitude",
        lon="pickup_longitude",
        zoom=3,
        height=400
    )

    # Set the mapbox style (you can choose from various map styles)
    fig.update_layout(mapbox_style="open-street-map")

    # Show the plot
    st.plotly_chart(fig)    

    # Raw data
    st.subheader("Raw Data")
    st.dataframe(df)

else:
    st.warning("No data available to display.")
