In [2]:
# ! pip install streamlit

In [None]:
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import plotly as px
import joblib
import sklearn

In [None]:
print("numpy:", np.__version__)
print("pandas:", pd.__version__)
print("plotly:", px.__version__)
print("Sk:", sklearn.__version__)
print("St:", st.__version__)
print("Streamlit version:", joblib.__version__)

numpy: 1.24.4
pandas: 2.2.2
plotly: 5.24.1
Sk: 1.2.2
St: 1.44.1
Streamlit version: 1.4.2


In [None]:
streamlit==1.32.2
pandas==2.2.2
numpy==1.26.4
xgboost==2.0.3
plotly==5.21.0
scikit-learn==1.4.2

In [None]:
data = [[12, 2, 'Vistara', 'economy', 'Mumbai', 585, 'Hyderabad', 119, 180]]
columns = ['flight_month','day_of_week','airline','class','departure_city','departure_time','arrival_city','arrival_time','duration']
df1=pd.DataFrame(data,columns=columns)

In [None]:
# ! pip install numpy==1.24.4 scikit-learn==1.2.2

In [None]:
# Load the pickled model
with open('/content/model(XGB).pikle', 'rb') as f:
    model = pickle.load(f)

# Save using joblib
joblib.dump(model, "xgb_pipeline.joblib")

['xgb_pipeline.joblib']

In [None]:
def load_model():
    model_xgb = joblib.load('xgb_pipeline.joblib')
    return model_xgb

def predict(model, df):
    price_prediction = model.predict(df)
    prediction_df = pd.DataFrame(price_prediction, columns=['prediction'])
    final_df = pd.concat([df, prediction_df], axis=1)
    return final_df

if __name__ == "__main__":
    model_xgb = load_model()
    df = predict(model_xgb, df1)

In [None]:
df

Unnamed: 0,flight_month,day_of_week,airline,class,departure_city,departure_time,arrival_city,arrival_time,duration,prediction
0,12,2,Vistara,economy,Mumbai,585,Hyderabad,119,180,5281.726074


In [None]:
with open('/content/data.df(1)', 'rb') as file:
    df = pickle.load(file)

In [None]:
df

Unnamed: 0,flight_month,day_of_week,airline,class,departure_city,departure_time,arrival_city,arrival_time,duration
0,6,0,SpiceJet,economy,Delhi,1135,Mumbai,1265,130
1,6,0,SpiceJet,economy,Delhi,380,Mumbai,520,140
2,6,0,AirAsia,economy,Delhi,265,Mumbai,395,130
3,6,0,Vistara,economy,Delhi,620,Mumbai,755,135
4,6,0,Vistara,economy,Delhi,530,Mumbai,670,140
...,...,...,...,...,...,...,...,...,...
298128,8,6,Vistara,business,Chennai,585,Hyderabad,1190,605
298129,8,6,Vistara,business,Chennai,750,Hyderabad,1375,625
298130,8,6,Vistara,business,Chennai,425,Hyderabad,1255,830
298131,8,6,Vistara,business,Chennai,420,Hyderabad,1020,600


In [None]:
%%writefile app1.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import datetime
import pickle
import joblib
import xgboost
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Load the XGBoost model pipeline
with open('/content/model(XGB).pkl', 'rb') as file:
    pipeline = pickle.load(file)

# Load preprocessed dataframe (optional for EDA)
with open('/content/df', 'rb') as file:
    df = pickle.load(file)
# Sidebar Navigation
st.sidebar.title("Flight Price Prediction System")
selection = st.sidebar.radio("Go to", ["Home", "Price Prediction", "Evaluation"])

# Home Page
if selection == "Home":
    st.title("Welcome to the Flight Price Prediction System")
    st.write("This application predicts flight prices based on various features and provides insights through data analysis and visualization.")

    st.markdown("## Features:")
    st.markdown("- **Price Prediction:** Predict flight prices based on user-provided details like airline, route, and travel class.")
    st.markdown("- **Data Cleaning & Preprocessing:** Handles missing values, transforms date and duration fields, and encodes categorical features.")
    st.markdown("- **Exploratory Data Analysis (EDA):** Analyze trends such as price distribution across airlines, stops, and duration.")
    st.markdown("- **Model Training & Evaluation:** Uses machine learning models like Random Forest and XGBoost with proper cross-validation.")
    st.markdown("- **Interactive Deployment:** Powered by Streamlit, allowing real-time prediction and evaluation.")

# Price Prediction Page
if selection == "Price Prediction":
    st.title("Price Prediction 📈")
    st.write("Enter your flight details to predict the estimated flight price.")
    st.header('Enter your inputs:')

    # Inputs for price prediction
    Departure_city = st.selectbox('Departure City', ['Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai'])
    Arrival_city = st.selectbox('Arrival City', ['Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai'])
    Class = st.selectbox('Class', ['economy', 'business'])
    Airline = st.selectbox('Airline', ['SpiceJet', 'AirAsia', 'Vistara', 'GO FIRST', 'Indigo', 'Air India', 'Trujet', 'StarAir'])
    flight_month = st.number_input('Flight Month', min_value=1, max_value=12, step=1)
    Day_of_week = st.number_input('Day of Week', min_value=0, max_value=6, step=1)

    user_time = st.time_input("Departure Time", value=datetime.time(0, 0))
    Departure_Time = user_time.hour * 60 + user_time.minute

    arrival_time_input = st.time_input("Arrival Time", value=datetime.time(0, 0))
    Arrival_Time = arrival_time_input.hour * 60 + arrival_time_input.minute

    # Calculate duration in minutes, handling overnight flights
    Duration = (Arrival_Time - Departure_Time) % (24 * 60)

    # Prediction Button
    if st.button('Predict'):
      input_data = {
        'departure_city': Departure_city,
        'arrival_city': Arrival_city,
        'class': Class,
        'airline': Airline,
        'flight_month': flight_month,
        'day_of_week': Day_of_week,
        'departure_time': Departure_Time,
        'arrival_time': Arrival_Time,
        'duration': Duration}
      input_df = pd.DataFrame([input_data])
      prediction = pipeline.predict(input_df)[0]
      st.write(f"Estimated Flight Price: ₹{prediction:.2f}")
# Evaluation Page
if selection == "Evaluation":
    st.title("📊 Model Performance Evaluation")

    if 'price' not in df.columns:
        st.error("Dataset does not contain actual prices for evaluation.")
    else:
        st.subheader("🔍 Evaluation Metrics")
        st.write("The model has been evaluated using standard regression metrics to assess its prediction performance on unseen data.")

        X = df.drop(columns=['price'])
        y_true = df['price']
        y_pred = pipeline.predict(X)

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true, y_pred)

        st.metric("📌 Mean Absolute Error (MAE)", f"{mae:.2f}", help="Average of the absolute errors between actual and predicted prices.")
        st.metric("📌 Mean Squared Error (MSE)", f"{mse:.2f}", help="Average of squared differences between actual and predicted prices.")
        st.metric("📌 Root Mean Squared Error (RMSE)", f"{rmse:.2f}", help="Square root of MSE. Lower values indicate better model performance.")
        st.metric("📌 R² Score", f"{r2:.4f}", help="Proportion of the variance in the dependent variable that is predictable from the features.")

        st.subheader("📈 Actual vs Predicted Price Comparison")
        fig = px.scatter(
            x=y_true,
            y=y_pred,
            labels={'x': 'Actual Price', 'y': 'Predicted Price'},
            title="Scatter Plot: Actual vs Predicted Flight Prices",
            template="plotly_white",
            color_discrete_sequence=["#636EFA"]
        )
        fig.update_traces(marker=dict(size=5, opacity=0.6))
        fig.update_layout(
            height=500,
            margin=dict(l=20, r=20, t=50, b=20),
            title_x=0.5
        )
        st.plotly_chart(fig, use_container_width=True)

        st.success("Model evaluation completed successfully. The metrics and plot above provide insights into how well the model is performing.")

Overwriting app1.py


In [None]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K
up to date, audited 23 packages in 1s
[1G[0K⠇[1G[0K
[1G[0K⠇[1G[0K3 packages are looking for funding
[1G[0K⠇[1G[0K  run `npm fund` for details
[1G[0K⠇[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠇[1G[0K

In [None]:
!streamlit run app1.py & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.16.219.21

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.16.219.21:8501[0m
[0m
your url is: https://true-doodles-buy.loca.lt
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model