## In this notebook the work done was collecting user inputs for Age, Gender, Education Level, Job Title, and Years of Experience and adding them as a new row to the main training DataFrame. Preprocessing was then carried out, including label and one-hot encoding, on the combined DataFrame. Finally, predictions were made using the trained model on the user input represented by the last row

## Guidelines for Input:

### Age: (21 - 62)
### Gender: (Male/Female/Other)
### Education Level: (High School/Bachelor's Degree/Master's Degree/PhD)
### Job Title: Please ensure the job title input is from the provided job Title list above.
### Years of Experience: (0 - 34)

In [28]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
import joblib

In [29]:
clean_database = "C:/Users/HP/Documents/Documents/E process/dataanalysisstuff/salary-prediction/database/Clean Database.csv"
model_data = pd.read_csv(clean_database)
job_titles = model_data["Job Title"].unique().tolist()
job_titles

['Junior Sales Representative',
 'Receptionist',
 'Sales Associate',
 'Front End Developer',
 'Software Engineer',
 'Back end Developer',
 'Junior Sales Associate',
 'Junior HR Generalist',
 'Senior Project Engineer',
 'Software Developer',
 'Sales Representative',
 'Software Engineer Manager',
 'Sales Manager',
 'Junior HR Coordinator',
 'Front end Developer',
 'Sales Executive',
 'Full Stack Engineer',
 'Data Analyst',
 'Junior Marketing Analyst',
 'Junior Web Developer',
 'Delivery Driver',
 'Junior Developer',
 'Web Developer',
 'Marketing Analyst',
 'Financial Analyst',
 'Juniour HR Coordinator',
 'Product Manager',
 'Digital Marketing Manager',
 'Customer Service Representative',
 'Junior Software Developer',
 'Marketing Manager',
 'Marketing Coordinator',
 'Human Resources Coordinator',
 'Junior Software Engineer',
 'Junior Marketing Specialist',
 'Help Desk Analyst',
 'Data Entry Clerk',
 'Graphic Designer',
 'Senior Product Marketing Manager',
 'Product Designer',
 'Social Med

### Loading the Decision Tree Model

In [41]:
model = joblib.load("C:/Users/HP/Documents/Documents/E process/dataanalysisstuff/salary-prediction/Models/Decision_Tree_model.pkl ")

### Input Inquiry and Verification

In [30]:
genders= ["Male", "Female", "Other"]
education_level_options = ["High School", "Bachelor's Degree", "Master's Degree", "PhD"]

def input_features():
    age = 0
    while True:
        try:
            age = float(input("Enter Age (21 - 62): "))
        except ValueError:
            print("Invalid input.")
        if age < 21 or age > 62:
            print("Age must be a number between 21 and 62.")
        else:
            break


    gender = None
    while True:
        gender = str(input("Enter Gender (Male/ Female/ Other): ")).capitalize()
        if gender not in genders:
            print("Invalid input, gender must be Male, Female, or Other")
        else:
            break

    education_lvl = None
    while True:
        education_lvl = str(input("Enter Education Level (High School/ Bachelor's Degree/ Master's Degree/ PhD): "))
        if education_lvl not in education_level_options:
            print("Invalid input. Education Level must be one of the specified options.")
        else:
            break

    job_title = None
    while True:
        job_title = str(input("Enter Job Title (Must be a valid title): "))
        if job_title not in job_titles:
            print("Invalid input. Job Title must be one of the specified options.")
        else:
            break
    
    years = 0
    while True:
        try:
            years = float(input("Enter Years of Experience: "))
        except ValueError:
            print("Invalid input.")
            continue
        if years < 0 or years > 34:
            print("Years of Experience must be between 0 and 34 years.")
        else:
            break
    
    return age, gender, education_lvl, job_title, years


In [32]:
age, gender, education_lvl, job_title, years = input_features()

Enter Age (21 - 62): 30
Enter Gender (Male/ Female/ Other): male
Enter Education Level (High School/ Bachelor's Degree/ Master's Degree/ PhD): PhD
Enter Job Title (Must be a valid title): CEO
Enter Years of Experience: 3


### Put the input in a dataframe

In [33]:
salary_df = model_data.drop(["Salary", "Unnamed: 0"], axis = 1)

input_dict = {
    "Age": [age],
    "Gender": [gender],
    "Education Level": [education_lvl],
    "Job Title": [job_title],
    "Years of Experience": [years]
}

input_df = pd.DataFrame(input_dict)

input_df


Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience
0,30.0,Male,PhD,CEO,3.0


### appending input data to training data

In [34]:
input_row = input_df.iloc[0]
salary_df = salary_df.append(input_row, ignore_index= True)

  salary_df = salary_df.append(input_row, ignore_index= True)


In [35]:
input_row.to_frame().T

Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience
0,30.0,Male,PhD,CEO,3.0


In [36]:
salary_df.tail(1)

Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience
1787,30.0,Male,PhD,CEO,3.0


### Preprocessing the data

In [39]:
label_encoder = LabelEncoder()
salary_df["Education Level"] = label_encoder.fit_transform(salary_df["Education Level"])

onehot_encoder = OneHotEncoder(drop="first", sparse_output=False)
onehot_encoded = onehot_encoder.fit_transform(salary_df[["Gender", "Job Title"]])
# Create a DataFrame from the one-hot encoded array
onehot_df = pd.DataFrame(onehot_encoded, columns=onehot_encoder.get_feature_names_out(["Gender", "Job Title"]))
# Concatenate the one-hot encoded DataFrame with the rest of the features
salary_df = pd.concat([salary_df, onehot_df], axis=1)
# Drop the original "Gender" and "Job Title" columns
salary_df = salary_df.drop(["Gender", "Job Title"], axis=1)

### Predicting Salary

In [42]:
salary_prediction = model.predict(salary_df.tail(1))
print(f"Age: {int(age)}\nGender: {gender}\nEducation Level: {education_lvl}\nJob Title: {job_title}\nYears of Experience: {int(years)} years\n\nPredicted salary: {int(salary_prediction)}")


AttributeError: 'numpy.ndarray' object has no attribute 'predict'

In [43]:
type(model)

numpy.ndarray