# Salary Prediction from LinkedIn Job Postings - Evaluate Stats Model

In [1]:
import pandas as pd, numpy as np
import salary
from sklearn.base import BaseEstimator

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd, numpy as np


## Evaluate Mean Salary by Norm Title Model

In [4]:
class MeanSalaryEstimator(BaseEstimator):
    def fit(self, X: pd.DataFrame, y):
        Xy = X.copy()
        Xy['salary'] = y
        self._salaries_by_title = Xy.groupby('norm_title')['salary'].mean()
        self._global_salary = Xy['salary'].mean()
        return self
    
    def predict(self, X):
        return np.array([
            self._salaries_by_title.loc[title] if title in self._salaries_by_title.index else self._global_salary
            for title in X['norm_title']
        ])

best_model = salary.train_evaluate_model(MeanSalaryEstimator())

Mean CV train R2: 0.1790
Mean CV test R2: 0.1755
Mean CV train RMSE: 55169.0417
Mean CV test RMSE: 55286.8541
Mean CV train MAE: 36937.7517
Mean CV test MAE: 37006.5690
On average, our predicted salaries are $37006.57 off the true salaries


In [5]:
class MedianSalaryEstimator(BaseEstimator):
    def fit(self, X: pd.DataFrame, y):
        Xy = X.copy()
        Xy['salary'] = y
        self._salaries_by_title = Xy.groupby('norm_title')['salary'].median()
        self._global_salary = Xy['salary'].median()
        return self
    
    def predict(self, X):
        return np.array([
            self._salaries_by_title.loc[title] if title in self._salaries_by_title.index else self._global_salary
            for title in X['norm_title']
        ])

best_model = salary.train_evaluate_model(MedianSalaryEstimator())

Mean CV train R2: 0.1307
Mean CV test R2: 0.1285
Mean CV train RMSE: 56766.5715
Mean CV test RMSE: 56841.4859
Mean CV train MAE: 35312.9150
Mean CV test MAE: 35420.3336
On average, our predicted salaries are $35420.33 off the true salaries
