In [4]:
# prompt: mount google drive

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# prompt: import file from this path '/content/sample_data/lasso_model.pkl'

import pickle
with open('/content/sample_data/lasso_model.pkl', 'rb') as file:
  lasso_model = pickle.load(file)
# Now you can use the lasso_model object
lasso_model


In [8]:
# Install Gradio
!pip install gradio

Collecting gradio
  Downloading gradio-5.26.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [9]:
import gradio as gr
import numpy as np
import pickle

# Load the model
with open('/content/sample_data/lasso_model.pkl', 'rb') as file:
    lasso_model = pickle.load(file)

# Options
country_options = ['Argentina', 'Australia', 'Bangladesh', 'Belgium', 'Brazil', 'Canada', 'China', 'Colombia',
                   'France', 'Germany', 'India', 'Ireland', 'Italy', 'Japan', 'Kenya', 'Mexico', 'Netherlands',
                   'Nigeria', 'Pakistan', 'Poland', 'Portugal', 'Russia', 'South Africa', 'South Korea',
                   'Spain', 'Turkey', 'United Arab Emirates',
                   'United Kingdom of Great Britain and Northern Ireland', 'United States of America', 'Other']

title_options = ['Data Analyst (Business, Marketing, Financial, Quantitative, etc)',
                 'Data Scientist', 'Data Engineer', 'Machine Learning/ MLops Engineer',
                 'Manager (Program, Project, Operations, Executive-level, etc)', 'Software Engineer', 'Other']

industry_options = ['Accounting/Finance', 'Broadcasting/Communications', 'Computers/Technology', 'Energy/Mining',
                    'Government/Public Service', 'Medical/Pharmaceutical', 'Online Service/Internet-based Services',
                    'Retail/Sales', 'Other']

ml_experience_options = [
    'No (we do not use ML methods)',
    'We use ML methods for generating insights (but do not put working models into production)',
    'We recently started using ML methods (i.e., models in production for less than 2 years)',
    'We are exploring ML methods (and may one day put a model into production)',
    'We have well established ML methods (i.e., models in production for more than 2 years)'
]

spending_options = ['$1-$99', '$100-$999', '$1000-$9,999', '$10,000-$99,999', '$100,000 or more ($USD)']

# Expected feature order
final_feature_list = ['For how many years have you used machine learning methods?',
                      'What is the size of the company where you are employed?'] + \
                     [f"In which country do you currently reside?_{c}" for c in country_options] + \
                     [f"Select the title most similar to your current role (or most recent title if retired): - Selected Choice_{t}" for t in title_options] + \
                     [f"In what industry is your current employer/contract (or your most recent employer if retired)? - Selected Choice_{i}" for i in industry_options] + \
                     [f"Does your current employer incorporate machine learning methods into their business?_{m}" for m in ml_experience_options] + \
                     [f"Approximately how much money have you spent on machine learning and/or cloud computing services at home or at work in the past 5 years (approximate $USD)?\n (approximate $USD)?_{s}" for s in spending_options]

# Prediction function
def predict_salary(years_exp, company_size, country, title, industry, ml_level, spending_bracket):
    try:
        features = np.zeros(lasso_model.n_features_in_)

        def set_one_hot(template, value):
            full_feature = template.format(value)
            if full_feature in final_feature_list:
                features[final_feature_list.index(full_feature)] = 1
            else:
                print("Missing feature:", full_feature)

        features[0] = years_exp
        features[1] = company_size
        set_one_hot("In which country do you currently reside?_{}", country)
        set_one_hot("Select the title most similar to your current role (or most recent title if retired): - Selected Choice_{}", title)
        set_one_hot("In what industry is your current employer/contract (or your most recent employer if retired)? - Selected Choice_{}", industry)
        set_one_hot("Does your current employer incorporate machine learning methods into their business?_{}", ml_level)
        set_one_hot("Approximately how much money have you spent on machine learning and/or cloud computing services at home or at work in the past 5 years (approximate $USD)?\n (approximate $USD)?_{}", spending_bracket)

        prediction = lasso_model.predict([features])[0]
        return f"${max(prediction, 0):,.2f}"
    except Exception as e:
        return f"Error: {e}"

# Build app layout with Blocks for vertical stacking
with gr.Blocks(title="Data Scientist Salary Projector") as demo:
    gr.Markdown("# 💼 Data Scientist Salary Projector")
    gr.Markdown("Estimate your salary using experience, job title, and industry context.")

    with gr.Column():
        years_exp = gr.Slider(0, 50, step=1, label="Years of ML Experience")
        company_size = gr.Number(label="Company Size (Employees)")
        country = gr.Dropdown(choices=country_options, label="Country of Residence")
        title = gr.Dropdown(choices=title_options, label="Current Job Title")
        industry = gr.Dropdown(choices=industry_options, label="Employer Industry")
        ml_level = gr.Dropdown(choices=ml_experience_options, label="ML Production Experience")
        spending = gr.Dropdown(choices=spending_options, label="ML/Cloud Spending (5 Years)")

        output = gr.Textbox(label="Predicted Salary")

        submit = gr.Button("Calculate Salary")

        submit.click(
            predict_salary,
            inputs=[years_exp, company_size, country, title, industry, ml_level, spending],
            outputs=output
        )

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2b37c9460bb393e49c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


