<a href="https://colab.research.google.com/github/sibot89/Absenteeism-Prediction/blob/main/Absenteeism_Predict_with_new_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
from google.colab import files

In [9]:
class CustomScaler(BaseEstimator, TransformerMixin):

  def __init__(self, columns, copy=True, with_mean=True, with_std=True):
    self.scaler = StandardScaler(copy, with_mean, with_std)
    self.columns = columns
    self.mean = None
    self.var = None

  def fit(self, X, y=None):
    self.scaler.fit(X[self.columns], y)
    self.mean = np.mean(X[self.columns])
    self.var = np.var(X[self.columns])
    return self

  def transform(self, X, y=None, copy=None):
    init_col_order = X.columns
    X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),
                            columns=self.columns)
    X_not_scaled = X.loc[:, ~X.columns.isin(self.columns)]
    return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]


In [10]:
class predict_new_data():

  def __init__(self, model_file, scaler_file):
    # with open('Model', 'rb') as model_file, open('Scaler', 'rb') as scaler_file
    model_file = files.download('Model', 'rb')
    scaler_file = files.download('Scaler', 'rb')
    self.reg = pickle.load(model_file)
    self.scaler = pickle.load(scaler_file)
    self.data = None

  def load_and_clean_data(self, data_file):
    df = pd.read_csv(data_file, delimiter=',')
    self.df_with_predictions = df.copy()
    df = df.drop(['ID'], axis=1)
    df['Absenteeism Time in Hour'] = 'NaN'

    reason_columns = pd.get_dummies(df['Reason for Absence'], drop_first=True)
    reason_type1 = reason_columns.loc[:,1:14].max(axis=1)
    reason_type2 = reason_columns.loc[:,15:17].max(axis=1)
    reason_type3 = reason_columns.loc[:,18:21].max(axis=1)
    reason_type4 = reason_columns.loc[:,22:].max(axis=1)

    df = df.drop(['Reason for Absence'], axis=1)
    df = pd.concat([df, reason_type1, reason_type2, reason_type3, reason_type4], axis=1)

    reordered_columns = ['Reason1', 'Reason2', 'Reason3', 'Reason4', 'Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average(in Minutes)', 'Body Mass Index',
       'Education', 'Children', 'Pets', 'Absenteeism Time in Hours']
    df = df[reordered_columns]

    df['Date'] = pd.to_datetime(df['Date'])


    df['Month'] = df['Date'].apply(lambda x: x.month)

    df = df.drop(['Date'], axis=1)

    df['Education'] = df['Education'].map({1:0, 2:1, 3:1, 4:1})

    reordered_names = ['Reason1', 'Reason2', 'Reason3', 'Reason4', 'Month', 'Day of the Week',
       'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average(in Minutes)', 'Body Mass Index',
       'Education', 'Children', 'Pets', 'Absenteeism Time in Hours']
    df = df[reordered_names]

    df = df.drop(['Absenteeism Time in Hours', 'Daily Work Load Average',
                  'Distance to Work'], axis=1)

    self.preprocessed_data = df.copy()

    self.data = self.scaler.transform(df)

  def predicted_probability(self):
       if (self.data is not None):
          pred = self.reg.predict_proba(self.data)[:,1]
          return pred

  def predicted_output_category(self):
            if (self.data is not None):
                pred_outputs = self.reg.predict(self.data)
                return pred_outputs

  def predicted_outputs(self):
            if (self.data is not None):
                self.preprocessed_data['Probability'] = self.reg.predict_proba(self.data)[:,1]
                self.preprocessed_data ['Prediction'] = self.reg.predict(self.data)
                return self.preprocessed_data



