In [1]:
import pickle
import numpy as np
import pandas as pd
from pandas.errors import SettingWithCopyWarning

from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=SettingWithCopyWarning)

# **Loading the Models**

In [2]:
batting_filepath = '../model-files/trained_batting_model.pkl'
with open(batting_filepath, 'rb') as f:
    batting_model = pickle.load(f)

In [3]:
bowling_filepath = '../model-files/trained_bowling_model.pkl'
with open(bowling_filepath, 'rb') as f:
    bowling_model = pickle.load(f)

In [4]:
batting_scaler_filepath = '../model-files/trained_batting_scaler.pkl'
with open(batting_scaler_filepath, 'rb') as f:
    batting_scaler = pickle.load(f)

In [5]:
bowling_scaler_filepath = '../model-files/trained_bowling_scaler.pkl'
with open(bowling_scaler_filepath, 'rb') as f:
    bowling_scaler = pickle.load(f)

In [6]:
batting_selector_filepath = '../model-files/trained_batting_selector.pkl'
with open(batting_selector_filepath, 'rb') as f:
    batting_selector = pickle.load(f)

In [7]:
bowling_selector_filepath = '../model-files/trained_bowling_selector.pkl'
with open(bowling_selector_filepath, 'rb') as f:
    bowling_selector = pickle.load(f)

In [8]:
batting_df = pd.read_pickle('../FinalDFs/PostEDA/batting_df.pkl')
bowling_df = pd.read_pickle('../FinalDFs/PostEDA/bowling_df.pkl')

# **Making the Predict Function**

In [9]:
class InputNewData:
    '''Inputs new data in the same format as the orignal dataframe.'''
    def __init__(self, data_template):
        self.data_template = data_template

    def add_new_row(self, data_values):
        new_data = pd.DataFrame([data_values], columns=[col for col in self.data_template.columns])
        return new_data

In [10]:
class PreprocessingPipeline:
    '''Performs preprocessing of the data.'''
    def __init__(self, new_data_df, role_scaler, role_selector, role_col_list):
        self.new_data_df = new_data_df
        self.scaler = role_scaler
        self.selector = role_selector
        self.role_col_list = role_col_list

    def preprocess_data(self):
        scaled_df, _ = self._scale_numeric_columns(self.new_data_df)
        scaled_dummy_df = self._get_categorical_dummies(scaled_df)
        dropped_df = scaled_dummy_df.drop(columns=['Player', 'NY_SalaryUSD'])
        reordered_df = self._reorder_columns(dropped_df)
        preprocessed_df = self.selector.transform(reordered_df)
        return preprocessed_df
    
    def _scale_numeric_columns(self, data_df):
        numeric_columns = [col for col in data_df.select_dtypes(include=[np.number]).columns if col not in ['Season', 'Role', 'changed_teams']]
        data_df[numeric_columns] = self.scaler.transform(data_df[numeric_columns])
        return data_df, data_df[numeric_columns]
    
    def _get_categorical_dummies(self, data_df):
        dummy_df = pd.get_dummies(data_df, columns=['Country', 'Team'])
        dummy_cols_with_zero = [col for col in self.role_col_list if col not in dummy_df.columns]
        for col in dummy_cols_with_zero:
            dummy_df[col] = 0
        return dummy_df
    
    def _reorder_columns(self, data_df):
        desired_order = self.role_col_list
        remaining_cols = [col for col in data_df.columns if col not in desired_order]
        reordered_df = data_df[remaining_cols+desired_order]
        return reordered_df

In [11]:
def predict_salary(data, role):
    
    if role=='batting':
        data_template = batting_df.copy()
        role_scaler = batting_scaler
        role_selector = batting_selector
        model = batting_model
        country_and_team_dummies = ['Country_Afghanistan', 'Country_Australia', 'Country_Bangladesh',
        'Country_England', 'Country_India', 'Country_Netherlands',
        'Country_New Zealand', 'Country_Pakistan', 'Country_South Africa',
        'Country_Sri Lanka', 'Country_Uncapped', 'Country_West Indies',
        'Team_Chennai Super Kings', 'Team_Deccan Chargers',
        'Team_Delhi Capitals', 'Team_Delhi Daredevils', 'Team_Gujarat Lions',
        'Team_Kings XI Punjab', 'Team_Kochi Tuskers Kerala',
        'Team_Kolkata Knight Riders', 'Team_Mumbai Indians',
        'Team_Pune Warriors', 'Team_Rajasthan Royals',
        'Team_Rising Pune Supergiants', 'Team_Royal Challengers Bangalore',
        'Team_Sunrisers Hyderabad']
    elif role=='bowling':
        data_template = bowling_df.copy()
        role_scaler = bowling_scaler
        role_selector = bowling_selector
        model = bowling_model
        country_and_team_dummies = ['Country_Afghanistan', 'Country_Australia',
       'Country_Bangladesh', 'Country_England', 'Country_India',
       'Country_Nepal', 'Country_Netherlands', 'Country_New Zealand',
       'Country_Pakistan', 'Country_South Africa', 'Country_Sri Lanka',
       'Country_Uncapped', 'Country_West Indies', 'Team_Chennai Super Kings',
       'Team_Deccan Chargers', 'Team_Delhi Capitals', 'Team_Delhi Daredevils',
       'Team_Gujarat Lions', 'Team_Kings XI Punjab',
       'Team_Kochi Tuskers Kerala', 'Team_Kolkata Knight Riders',
       'Team_Mumbai Indians', 'Team_Pune Warriors', 'Team_Rajasthan Royals',
       'Team_Rising Pune Supergiants', 'Team_Royal Challengers Bangalore',
       'Team_Sunrisers Hyderabad']
    else:
        print(f'Role cannot be {role}. It can only be "bowling" or "batting".')
    
    inputer = InputNewData(data_template=data_template)
    new_data_df = inputer.add_new_row(data)

    preprocessor = PreprocessingPipeline(
        new_data_df=new_data_df,
        role_scaler=role_scaler,
        role_col_list=country_and_team_dummies,
        role_selector=role_selector
    )
    model_input = preprocessor.preprocess_data()
    y_pred = model.predict(model_input)
    _, scaler_input = preprocessor._scale_numeric_columns(new_data_df)
    scaler_input['NY_SalaryUSD'] = y_pred[0]
    pred_salary = role_scaler.inverse_transform(scaler_input)[0][-1]
    return pred_salary

# **Test Predictions**

In [27]:
sample_bat_data = [
    'Shubman Gill', 'India', 0, 2023, 'Kolkata Knight Riders', 963501.60,
    17, 5, 564, 890, 59.33, 
    157.80, 4, 3, 85, 33, 0,
    0
]

In [28]:
sample_bat_salary = predict_salary(sample_bat_data, 'batting')

In [29]:
sample_bat_salary

977323.0334347271

In [21]:
sample_bowl_data = [
    'Umran Malik', 'India', 0, 2022, 'Sunrisers Hyderabad', 481914.00,
    14, 2, 295, 444, 
    22, 20.18, 9.03, 13.40, 1,
    1, 20, 20, 40, 0, 0
]

In [22]:
sample_bowl_salary = predict_salary(sample_bowl_data, 'bowling')

In [23]:
sample_bowl_salary

483044.7507882039