In [1]:
# Install required packages
!pip install streamlit catboost scikit-learn --quiet
!pip install pyngrok --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m113.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Import required modules
import streamlit as st
import joblib
import numpy as np
from pyngrok import ngrok


In [4]:
# Load the trained CatBoost model
model = joblib.load("catboost_model.pkl")


In [5]:
# Input fields
age = st.slider("Age", 17, 75, 30)
workclass = st.selectbox("Workclass", [0, 1, 2, 3, 4, 5, 6, 7])  # Encoded values
fnlwgt = st.number_input("Fnlwgt (weight factor)", min_value=10000, max_value=1000000, value=100000)
educational_num = st.slider("Educational-num", 1, 16, 10)
marital_status = st.selectbox("Marital Status", [0, 1, 2, 3, 4, 5])  # Encoded
occupation = st.selectbox("Occupation", list(range(15)))  # Encoded
relationship = st.selectbox("Relationship", list(range(6)))  # Encoded
race = st.selectbox("Race", list(range(5)))  # Encoded
gender = st.selectbox("Gender", [0, 1])  # Male:1, Female:0
capital_gain = st.number_input("Capital Gain", min_value=0, max_value=99999, value=0)
capital_loss = st.number_input("Capital Loss", min_value=0, max_value=99999, value=0)
hours_per_week = st.slider("Hours Per Week", 1, 100, 40)
native_country = st.selectbox("Native Country", list(range(42)))  # Encoded

# Predict button
if st.button("Predict Income"):
    input_data = np.array([[age, workclass, fnlwgt, educational_num,
                            marital_status, occupation, relationship,
                            race, gender, capital_gain, capital_loss,
                            hours_per_week, native_country]])

    prediction = model.predict(input_data)[0]

    st.markdown("### 🧾 Prediction Result:")
    if prediction == 1:
        st.success("✅ Predicted: Income > 50K")
    else:
        st.warning("🔻 Predicted: Income ≤ 50K")

# Launch the app using ngrok (for Colab)
def launch():
    from threading import Thread
    import os
    os.system("streamlit run app.py &")
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")

# Save this notebook as app.py and launch

2025-07-29 14:11:58.490 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-29 14:11:58.498 Session state does not function when running a script without `streamlit run`


In [6]:
%%writefile app.py
# Import required modules
import streamlit as st
import joblib
import numpy as np

# Page configuration
st.set_page_config(
    page_title="Income Prediction Dashboard",
    page_icon="💰",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        font-size: 3rem;
        font-weight: bold;
        text-align: center;
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        margin-bottom: 2rem;
    }

    .sub-header {
        text-align: center;
        color: #666;
        font-size: 1.2rem;
        margin-bottom: 2rem;
    }

    .prediction-container {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 2rem;
        border-radius: 15px;
        color: white;
        text-align: center;
        box-shadow: 0 10px 30px rgba(0,0,0,0.1);
        margin: 2rem 0;
    }

    .success-prediction {
        background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
        padding: 2rem;
        border-radius: 15px;
        color: white;
        text-align: center;
        box-shadow: 0 10px 30px rgba(0,0,0,0.1);
        margin: 2rem 0;
    }

    .warning-prediction {
        background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
        padding: 2rem;
        border-radius: 15px;
        color: #333;
        text-align: center;
        box-shadow: 0 10px 30px rgba(0,0,0,0.1);
        margin: 2rem 0;
    }

    .info-card {
        background: #f8f9fa;
        padding: 1.5rem;
        border-radius: 10px;
        border-left: 4px solid #667eea;
        margin: 1rem 0;
    }

    .stButton > button {
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        color: white;
        border: none;
        padding: 0.75rem 2rem;
        border-radius: 25px;
        font-weight: bold;
        font-size: 1.1rem;
        width: 100%;
        transition: all 0.3s ease;
    }

    .stButton > button:hover {
        transform: translateY(-2px);
        box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
    }
</style>
""", unsafe_allow_html=True)

# Load the trained CatBoost model
try:
    model = joblib.load("catboost_model.pkl")
    model_loaded = True
except FileNotFoundError:
    model_loaded = False

# Define mappings for categorical features
workclass_map = {
    "Private": 4, "Self-emp-inc": 5, "Self-emp-not-inc": 6, "Federal-Government": 1,
    "Local-Government": 2, "State-Government": 7, "Without-pay": 8, "Never-worked": 3
}
marital_status_map = {
    "Married-civ-spouse": 2, "Divorced": 0, "Never-married": 4, "Separated": 5,
    "Widowed": 6, "Married-spouse-absent": 1, "Married-AF-spouse": 3
}
occupation_map = {
    "Tech-support": 13, "Craft-repair": 4, "Other-service": 9, "Sales": 12,
    "Exec-managerial": 5, "Prof-specialty": 10, "Handlers-cleaners": 6,
    "Machine-op-inspct": 7, "Adm-clerical": 0, "Farming-fishing": 2,
    "Transport-moving": 14, "Priv-house-serv": 8, "Protective-serv": 11,
    "Armed-Forces": 1, "Not-Applicable": -1
}
relationship_map = {
    "Wife": 5, "Own-child": 3, "Husband": 0, "Not-in-family": 1, "Other-relative": 2,
    "Unmarried": 4
}
race_map = {
    "White": 4, "Asian-Pac-Islander": 1, "Amer-Indian-Eskimo": 0, "Other": 2,
    "Black": 3
}
gender_map = {
    "Female": 0, "Male": 1
}
native_country_map = {
    "United-States": 38, "Cambodia": 5, "England": 10, "Puerto-Rico": 31,
    "Canada": 6, "Germany": 11, "Outlying-US(Guam-USVI-etc)": 27, "India": 18,
    "Japan": 22, "Greece": 12, "South": 34, "China": 7, "Cuba": 9, "Iran": 20,
    "Honduras": 15, "Philippines": 29, "Italy": 21, "Poland": 30, "Jamaica": 23,
    "Vietnam": 40, "Mexico": 25, "Portugal": 30, "Ireland": 19, "France": 8,
    "Dominican-Republic": 9, "Ecuador": 9, "El-Salvador": 9, "Trinadad&Tobago": 37,
    "Haiti": 14, "Columbia": 8, "Guatemala": 13, "Nicaragua": 26, "Peru": 28,
    "Hungary": 16, "Hong": 17, "Thailand": 36, "Scotland": 33, "Yugoslavia": 41,
    "Taiwan": 35, "Holand-Netherlands": 15
}

# Main UI
st.markdown('<div class="main-header">💰 Income Prediction Dashboard</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Predict whether annual income exceeds $50,000 using machine learning</div>', unsafe_allow_html=True)

if not model_loaded:
    st.error("⚠️ Error: catboost_model.pkl not found. Please make sure the model file is in the correct directory.")
    st.stop()

# Create columns for better layout
col1, col2 = st.columns([1, 1])

with col1:
    st.markdown("### 👤 Personal Information")
    age = st.slider("🎂 Age", 17, 75, 30, help="Age of the individual")
    gender_input = st.selectbox("⚧️ Gender", list(gender_map.keys()), help="Gender of the individual")
    race_input = st.selectbox("🌍 Race", list(race_map.keys()), help="Race/ethnicity")
    native_country_input = st.selectbox("🏠 Native Country", list(native_country_map.keys()),
                                       index=list(native_country_map.keys()).index("United-States"))

    st.markdown("### 👨‍👩‍👧‍👦 Family & Relationships")
    marital_status_input = st.selectbox("💍 Marital Status", list(marital_status_map.keys()))
    relationship_input = st.selectbox("👥 Relationship", list(relationship_map.keys()))

with col2:
    st.markdown("### 💼 Work Information")
    workclass_input = st.selectbox("🏢 Work Class", list(workclass_map.keys()),
                                  index=list(workclass_map.keys()).index("Private"))
    occupation_input = st.selectbox("💼 Occupation", list(occupation_map.keys()))
    hours_per_week = st.slider("⏰ Hours Per Week", 1, 100, 40, help="Number of hours worked per week")

    st.markdown("### 🎓 Education & Demographics")
    educational_num = st.slider("📚 Education Level", 1, 16, 10,
                               help="Number representing education level (1=Preschool, 16=Doctorate)")
    fnlwgt = st.number_input("⚖️ Final Weight", min_value=10000, max_value=1000000, value=100000,
                            help="Demographic weighting factor")

# Financial information in full width
st.markdown("### 💸 Financial Information")
fin_col1, fin_col2 = st.columns(2)

with fin_col1:
    capital_gain = st.number_input("📈 Capital Gain ($)", min_value=0, max_value=99999, value=0,
                                  help="Income from investment sources")

with fin_col2:
    capital_loss = st.number_input("📉 Capital Loss ($)", min_value=0, max_value=99999, value=0,
                                  help="Losses from investment sources")

# Convert inputs to encoded values
workclass_encoded = workclass_map[workclass_input]
marital_status_encoded = marital_status_map[marital_status_input]
occupation_encoded = occupation_map[occupation_input]
relationship_encoded = relationship_map[relationship_input]
race_encoded = race_map[race_input]
gender_encoded = gender_map[gender_input]
native_country_encoded = native_country_map[native_country_input]

# Center the prediction button
st.markdown("---")
col_center = st.columns([1, 2, 1])
with col_center[1]:
    predict_button = st.button("🔮 Predict Income Level")

if predict_button:
    with st.spinner("🔄 Analyzing data and making prediction..."):
        input_data = np.array([[age, workclass_encoded, fnlwgt, educational_num,
                               marital_status_encoded, occupation_encoded, relationship_encoded,
                               race_encoded, gender_encoded, capital_gain, capital_loss,
                               hours_per_week, native_country_encoded]])

        prediction = model.predict(input_data)[0]

        # Display prediction with enhanced styling
        if prediction == 1:
            st.markdown("""
            <div class="success-prediction">
                <h2>🎉 High Income Prediction</h2>
                <h3>Annual Income: > $50,000</h3>
                <p>Based on the provided information, the model predicts this individual likely earns more than $50,000 annually.</p>
            </div>
            """, unsafe_allow_html=True)
        else:
            st.markdown("""
            <div class="warning-prediction">
                <h2>💼 Moderate Income Prediction</h2>
                <h3>Annual Income: ≤ $50,000</h3>
                <p>Based on the provided information, the model predicts this individual likely earns $50,000 or less annually.</p>
            </div>
            """, unsafe_allow_html=True)

        # Display input summary
        st.markdown("### 📋 Input Summary")
        summary_col1, summary_col2 = st.columns(2)

        with summary_col1:
            st.markdown(f"""
            <div class="info-card">
                <strong>Personal Details:</strong><br>
                Age: {age} years<br>
                Gender: {gender_input}<br>
                Race: {race_input}<br>
                Native Country: {native_country_input}
            </div>
            """, unsafe_allow_html=True)

            st.markdown(f"""
            <div class="info-card">
                <strong>Work Information:</strong><br>
                Work Class: {workclass_input}<br>
                Occupation: {occupation_input}<br>
                Hours/Week: {hours_per_week}
            </div>
            """, unsafe_allow_html=True)

        with summary_col2:
            st.markdown(f"""
            <div class="info-card">
                <strong>Family Status:</strong><br>
                Marital Status: {marital_status_input}<br>
                Relationship: {relationship_input}
            </div>
            """, unsafe_allow_html=True)

            st.markdown(f"""
            <div class="info-card">
                <strong>Financial & Education:</strong><br>
                Education Level: {educational_num}<br>
                Capital Gain: ${capital_gain:,}<br>
                Capital Loss: ${capital_loss:,}
            </div>
            """, unsafe_allow_html=True)

# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #666; padding: 2rem;">
    <p>🤖 Powered by CatBoost Machine Learning Model</p>
    <p><em>This prediction is for informational purposes only and should not be used for actual financial decisions.</em></p>
</div>
""", unsafe_allow_html=True)

Writing app.py


In [7]:
!ngrok config add-authtoken 30EbLmWvPM6OMM6eS2e2TZjmGsa_2aqeFfmCRzvCTdbFwgYCT

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
from pyngrok import ngrok
!streamlit run app.py &> /dev/null &
public_url = ngrok.connect(8501)
print(f"🔗 Click this to open your app:\n{public_url}")

🔗 Click this to open your app:
NgrokTunnel: "https://9ac32eaf6ac0.ngrok-free.app" -> "http://localhost:8501"
