#### <font color = 'green'> IT professionals and Developers Salary Prediction Model Industrialization

###### Import necessary libaries

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import pickle

###### Define the path to the folders containing the preprocessing and model artifacts

In [2]:
os.getcwd()

'C:\\Users\\Olanrewaju Adegoke\\Desktop\\TechTern\\mywork\\Stackoverflow_Salary_Project\\notebooks'

In [3]:
data_path = '../data'
model_path = '../models'
note_path = '../notebooks'

###### Function that load the model and preprocessing artifacts

In [4]:
def load_saved_model_and_preprocessing_artifact():
    os.chdir(model_path)
    with open('model_preprocessing_steps_stackoverflow.pkl', 'rb') as file:
        inference = pickle.load(file)
    return inference

###### Instantiate all the instances of the artifacts

In [5]:
inference = load_saved_model_and_preprocessing_artifact()
os.chdir(note_path)
inference_model = inference['model']
inference_scaler = inference['scaler']
inference_minmax = inference['min_max']
inference_onehot = inference['onehot']

###### The prediction and inference pipeline

In [6]:
model = inference_model
inference_scaler
inference_minmax
inference_onehot

def prep_prediction_inference(
    highest_education, certifications, job_type, country,
    age_range, prog_lang, database, cloud_platform, webframe,
    tech_tool, collab_tool, aisearchtool, aidevtool,
    employment_status, work_option, org_size, industry,
    years_of_coding, years_of_pro_coding, years_of_work_exp
):
    cols = [
        'highest_education', 'certifications', 'job_type', 'country',
       'age_range', 'prog_lang', 'database', 'cloud_platform', 'webframe',
       'tech_tool', 'collab_tool', 'aisearchtool', 'aidevtool',
       'employment_status', 'work_option', 'org_size', 'industry',
       'years_of_coding', 'years_of_pro_coding', 'years_of_work_exp'
    ]
    input_data = np.array([[
        highest_education, certifications, job_type, country,
        age_range, prog_lang, database, cloud_platform, webframe,
        tech_tool, collab_tool, aisearchtool, aidevtool,
        employment_status, work_option, org_size, industry,
        years_of_coding, years_of_pro_coding, years_of_work_exp
    ]])
    input_df = pd.DataFrame(input_data, columns=cols)

    # Convert the string columns to numeric
    input_df['years_of_coding'] = pd.to_numeric(input_df['years_of_coding'], downcast='float')
    input_df['years_of_pro_coding'] = pd.to_numeric(input_df['years_of_pro_coding'], downcast='float')
    input_df['years_of_work_exp'] = pd.to_numeric(input_df['years_of_work_exp'], downcast='float')
    
    
    num_data = input_df.select_dtypes(include=['int', 'float'])
    cat_data = input_df.select_dtypes(include=['object'])
    
    num = inference_scaler.transform(num_data)
    num = inference_minmax.transform(num)
    num_df = pd.DataFrame(num, columns=list(num_data.columns))
    
    cat = inference_onehot.transform(cat_data)
    cat_df = cat.reset_index(drop=True) 

    features = pd.concat([num_df, cat_df], axis=1)
    predictions = model.predict(features)
    salary = pd.DataFrame({'estimated_salary': [predictions[0]]})

    return round(salary)

###### Install gradio GUI interface

###### Launch the gradio interface for industrializing the model

In [7]:
sample = [['below bachelor',
  'Others',
  'Others',
  'UK',
  'Under_18',
  'Go',
  'MySQL',
  'Others',
  'Others',
  'Others',
  'Others',
  'Bing_AI',
  'Others',
  'Student, full-time',
  'Hybrid',
  '20 to 99 employees',
  'Other',
  7.0,
  11.0,
  11.0],
 ['Bachelor',
  'Others',
  'Others',
  'Others',
  '25-34',
  'Java',
  'Firebase Realtime Database',
  'Others',
  'Others',
  'Others',
  'Android Studio',
  'ChatGPT',
  'Others',
  'Employed, full-time',
  'Hybrid',
  '20 to 99 employees',
  'Other',
  5.0,
  4.0,
  11.0],
 ['below bachelor',
  'Others',
  'Others',
  'UK',
  'Under_18',
  'Others',
  'Others',
  'Others',
  'Others',
  'Others',
  'Android Studio',
  'Others',
  'Others',
  'Student, full-time',
  'Hybrid',
  '20 to 99 employees',
  'Other',
  4.0,
  11.0,
  11.0]]

In [8]:
import gradio as gr

sample_data = sample

inputs=[
        gr.Textbox(label='highest_education'),
        gr.Textbox(label='certifications'),
        gr.Textbox(label='job_type'),
        gr.Textbox(label='country'),
        gr.Textbox(label='age_range'),
        gr.Textbox(label='prog_lang'),
        gr.Textbox(label='database'),
        gr.Textbox(label='cloud_platform'),
        gr.Textbox(label='webframe'),
        gr.Textbox(label='tech_tool'),
        gr.Textbox(label='collab_tool'),
        gr.Textbox(label='aisearchtool'),
        gr.Textbox(label='aidevtool'),
        gr.Textbox(label='employment_status'),
        gr.Textbox(label='work_option'),
        gr.Textbox(label='org_size'),
        gr.Textbox(label='industry'),
        gr.Number(label='years_of_coding'),
        gr.Number(label='years_of_pro_coding'),
        gr.Number(label='years_of_work_exp')
    ]

outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(1, "fixed"), label="Developers and Tech Professionals Salary", headers=['estimated_salary'])]

title='StackOverFlow Developers and Tech Professionals Salary Predicition Model by Olanrewaju Adegoke'
description='You can supply the inputs or select the sample data points to determine the salaries of your desired role.'

interface = gr.Interface(fn = prep_prediction_inference, inputs = inputs, outputs = outputs, title=title, description=description, examples=sample_data)

interface.launch(share=True)

  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7864


--------


Running on public URL: https://e5c6120ec6b2b5b1a3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


