# Data loading and defining

In [None]:
#import necessary libraries

import pandas as pd
import numpy as np

In [None]:
data=pd.read_csv('datasets copies/DispatchSummary.csv')

In [None]:
data.columns

In [None]:
data.columns=['financial_year', 'sales', 'man_power', 'available_hours', 'inc_smh', 'efficiency', 'smh', 'booked', 'smh_lacs']

In [None]:
data.columns

In [None]:
dataDic={}
dataDic['financial_year']=data['financial_year'][0:9]
dataDic['sales']=data['sales'][0:9]
dataDic['man_power']=data['man_power'][0:9]
dataDic['efficiency']=data['efficiency'][0:9]
dataDic['booked']=data['booked'][0:9]
dataDic['dispatch_smh']=data['inc_smh'][0:9]
dataDic['inhouse_smh']=data['smh'][0:9]
dataDic['outsourced_smh']=data['smh_lacs'][0:9]

print(dataDic)

In [None]:
df=pd.DataFrame(dataDic)
print(df)

# Prediction of efficiency based on man power

In [None]:
manpower_train=df['man_power'].values.reshape(-1,1)
efficiency_train=df['efficiency'].values

**Various Models**

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR


In [None]:
def models_for_efficiency(model_name, model):
  print('\n',model_name, sep='')

  #model building
  model.fit(manpower_train, efficiency_train)

  #prediction on train data
  efficiency_train_predict=model.predict(manpower_train)
  print('On train dataset (Actual vs Predict)')
  print('Actual\tPredict')
  for i in range(len(manpower_train)):
    print(f'{efficiency_train[i]}\t{efficiency_train_predict[i]}')

  #mse
  mse=mean_squared_error(efficiency_train, efficiency_train_predict)
  print(f'\nMSE: {mse:.2f}\n')

  #plot
  plt.plot(manpower_train, efficiency_train, color='blue', label='Actual')
  plt.plot(manpower_train, efficiency_train_predict, color='red', label='Predict')
  plt.xlabel('man power')
  plt.ylabel('efficiency')
  plt.show()

  #prediction on unseen data
  manpower_test=data['man_power'][9:15].values.reshape(-1,1)
  financial_year_test=data['financial_year'][9:15].reset_index()


  efficiency_predict_test=model.predict(manpower_test)
  print('\nPrediction on future data')
  print('Financial year\tMan power\tEfficiency')
  for i in range(len(manpower_test)):
      print(financial_year_test['financial_year'][i],'\t',*manpower_test[i],'\t\t',efficiency_predict_test[i])
  return efficiency_predict_test


In [None]:
models={
    'LINEAR REGRESSION':LinearRegression(),
    'DECISION TREE REGRESSION':DecisionTreeRegressor(),
    'RANDOM FOREST REGRESSION':RandomForestRegressor(n_estimators=100, max_depth=4),
    'GRADIENT BOOSTING REGRESSION':GradientBoostingRegressor(n_estimators=100, max_depth=4),
    'SUPPORT VECTOR REGRESSION':SVR(kernel='linear', C=1.0, epsilon=0.1)
}

efficiency_predictions=[]
for model_name, model in models.items():
  efficiency_predictions.append(models_for_efficiency(model_name, model))

**Polynomial Regression**

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly=PolynomialFeatures(degree=3)
manpower_train_poly=poly.fit_transform(manpower_train)

In [None]:
from sklearn.linear_model import LinearRegression
efficiency_model_poly = LinearRegression()
efficiency_model_poly.fit(manpower_train_poly, efficiency_train)

#prediction on train dataset
efficiency_train_predict_poly=efficiency_model_poly.predict(manpower_train_poly)
print('On train dataset (Actual vs Predict)')
print('Actual\tPredict')
for i in range(len(manpower_train)):
    print(f'{efficiency_train[i]}\t{efficiency_train_predict_poly[i]}')

#mean square error
from sklearn.metrics import mean_squared_error
print('\nMean squared error: ', mean_squared_error(efficiency_train, efficiency_train_predict_poly), '\n')

#plot
import matplotlib.pyplot as plt

plt.plot(manpower_train, efficiency_train, color='blue', label='Actual')
plt.plot(manpower_train, efficiency_train_predict_poly, color='red', label='Predict')
plt.xlabel('man power')
plt.ylabel('efficiency')
plt.show()

#prediction on unseen data
manpower_test=data['man_power'][9:15].values.reshape(-1,1)
manpower_test_poly=poly.transform(manpower_test)
financial_year_test=data['financial_year'][9:15].reset_index()

efficiency_predicted_poly=efficiency_model_poly.predict(manpower_test_poly)

print('Prediction on future data')
print('Financial year\tMan power\tEfficiency')
for i in range(len(manpower_test)):
    print(financial_year_test['financial_year'][i],'\t',*manpower_test[i],'\t',efficiency_predicted_poly[i])

efficiency_predictions.append(efficiency_predicted_poly)

**Comparison of All Models**

In [None]:
future_years = ['2024-25', '2025-26', '2026-27', '2027-28', '2028-29', '2029-30']
labels=['Linear Regression', 'Decision Tree Regression', 'Random Forest Regression', 'Gradient Boosting Regression', 'Support Vector Regression', 'Polynomial Regression']
colors=['blue', 'black', 'purple', 'brown', 'red', 'olive']
# Plotting Future Predictions
plt.figure(figsize=(12, 6))
for i in range(len(labels)):
    plt.plot(future_years, efficiency_predictions[i], label=labels[i], marker='o', color=colors[i])
plt.xlabel('Financial Year')
plt.ylabel('Predicted Efficiency')
plt.title('Predicted Efficiency for Future Years')
plt.grid()
plt.legend()
plt.show()


**Hyper Parameter Tuning of SVR**

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import numpy as np

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

param_grid = {
    'svr__C': [0.1, 1, 10, 100],  # Regularization parameter
    'svr__epsilon': [0.01, 0.1, 1],  # Epsilon in the epsilon-SVR model
    'svr__kernel': ['linear', 'rbf', 'poly']  # Kernel type
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(manpower_train, efficiency_train)

In [None]:
print("Best parameters found: ", grid_search.best_params_)
print("Lowest MSE found: ", -grid_search.best_score_)

best_model = grid_search.best_estimator_
efficiency_pred_best = best_model.predict(manpower_train)
mse_best = mean_squared_error(efficiency_train, efficiency_pred_best)
print(f'Best Model MSE: {mse_best:.2f}')

**Final Model: Support Vector Regression**

In [None]:
fin_year=data['financial_year']
efficiency=pd.concat([df['efficiency'], pd.DataFrame(efficiency_predictions[4])], axis=0)
efficiency=efficiency.reset_index()

#display
print('Financial year\tEfficiency')
for i in range(len(fin_year)):
    print(fin_year[i],'\t',efficiency[0][i])

#plot
plt.figure(figsize=(12, 6))
plt.plot(fin_year, efficiency[0], marker='o')
plt.xlabel('Financial year')
plt.ylabel('Efficiency')
plt.title('Efficiency over years')
plt.grid()
plt.show()

**Calculation of future values**

In [None]:
booked_test=data['booked'][9:15].reset_index()

inhouse_smh_predicted = (booked_test['booked'] * efficiency_svr_test) / 100
inhouse_smh_predicted = inhouse_smh_predicted.reset_index()
inhouse_smh_predicted.columns=['index', 'smh']

In [None]:
print('Prediction on future data')
print('Financial year\tMan power\tEfficiency\tInhouse SMH\t\tBooked')
for i in range(len(manpower_test)):
    print(financial_year_test['financial_year'][i],'\t',*manpower_test[i],'\t',efficiency_svr_test[i],'\t',inhouse_smh_predicted['smh'][i],'\t',booked_test['booked'][i])


**Overall Data**

In [None]:
manpower=data['man_power']

inhouse_smh=pd.concat([pd.DataFrame({'smh':df['inhouse_smh']}), pd.DataFrame(inhouse_smh_predicted['smh'])], axis=0)
inhouse_smh=inhouse_smh.reset_index()

print('Financial year\tMan Power\tEfficiency\tInnhouse SMH')
for i in range(len(manpower)):
    print(fin_year[i], '\t\t', manpower[i], '\t\t', efficiency[0][i], '\t\t\t', inhouse_smh['smh'][i], sep='')

In [None]:
#plot(financial year vs inhouse smh)

plt.figure(figsize=(12, 6))
plt.plot(fin_year, inhouse_smh['smh'], marker='o')
plt.xlabel('Financial Year')
plt.ylabel('Inhouse SMH')
plt.title('Inhouse SMH over years')
plt.grid()
plt.show()

In [None]:
#plot(manpower vs inhouse_smh)

plt.figure(figsize=(12, 6))
plt.plot(manpower, inhouse_smh['smh'], marker='o')
plt.xlabel('Manpower')
plt.ylabel('Inhouse SMH')
plt.title('Inhouse SMH over Man power')
plt.grid()
plt.show()

# EDA

In [None]:
import seaborn as sns
sns.pairplot(df[['sales','dispatch_smh','inhouse_smh','outsourced_smh']])


# Prediction of dispatch smh based on the sales 

In [None]:
sales_train=df['sales'].values.reshape(-1,1)
dispatch_smh_train=df['dispatch_smh'].values

**Various Models**

In [None]:
def models_for_dispatch_smh(model_name, model):
  print('\n',model_name, sep='')

  #model building
  model.fit(sales_train, dispatch_smh_train)

  #prediction on train data
  dispatch_smh_train_predict=model.predict(sales_train)
  print('On train dataset (Actual vs Predict)')
  print('Actual\t\tPredict')
  for i in range(len(sales_train)):
    print(f'{dispatch_smh_train[i]}\t{dispatch_smh_train_predict[i]}')

  #mse
  mse=mean_squared_error(dispatch_smh_train, dispatch_smh_train_predict)
  print(f'\nMSE: {mse:.2f}\n')

  #plot
  plt.plot(sales_train, dispatch_smh_train, color='blue', label='Actual')
  plt.plot(sales_train, dispatch_smh_train_predict, color='red', label='Predict')
  plt.xlabel('sales')
  plt.ylabel('dispatch smh')
  plt.show()

  #prediction on unseen data
  sales_test=data['sales'][9:15].values.reshape(-1,1)

  dispatch_smh_predict_test=model.predict(sales_test)
  print('\nPrediction on future data')
  print('Sales\t\tDispatch SMH')
  for i in range(len(sales_test)):
    print(*sales_test[i],'\t\t',dispatch_smh_predict_test[i])

models={
    'LINEAR REGRESSION':LinearRegression(),
    'DECISION TREE REGRESSION':DecisionTreeRegressor(),
    'RANDOM FOREST REGRESSION':RandomForestRegressor(n_estimators=100, max_depth=4),
    'GRADIENT BOOSTING REGRESSION':GradientBoostingRegressor(n_estimators=100, max_depth=4),
    'SUPPORT VECTOR REGRESSION':SVR(kernel='linear', C=1.0, epsilon=0.1)
}

for model_name, model in models.items():
  models_for_dispatch_smh(model_name, model)



**Final Model: Polynomial Regression**

In [None]:
poly_dispatch=PolynomialFeatures(degree=2)
sales_train_poly=poly_dispatch.fit_transform(sales_train)

dispatch_model_poly=LinearRegression()
dispatch_model_poly.fit(sales_train_poly,dispatch_smh_train)

#dispatch predicted on training sales data
dispatch_train_predict_poly=dispatch_model_poly.predict(sales_train_poly)
print('Predicted dispatch values for training sales data:')
print(dispatch_train_predict_poly)

#mean squared error
mean_squared_error_dispatch_poly=mean_squared_error(dispatch_smh_train,dispatch_train_predict_poly)
print('Mean_squared_error=',mean_squared_error_dispatch_poly)

plt.plot(sales_train,dispatch_smh_train, color='blue', label='Actual')
plt.plot(sales_train,dispatch_train_predict_poly, color='red', label='predicted')
plt.xlabel('sales')
plt.ylabel('dispatch_smh')
plt.show()


#dispatch prediction for future-unseeen sales data
sales_test=data['sales'][9:15].values.reshape(-1,1)
sales_test_poly=poly_dispatch.transform(sales_test)

dispatch_smh_predicted_poly=dispatch_model_poly.predict(sales_test_poly)

print("Dispatch smh predicted for future-unseen data:")
print('Sales\t\tDispatch_smh')
for i in range(len(sales_test)):
    print(*sales_test[i],'\t',dispatch_smh_predicted_poly[i])


In [None]:
sales=data['sales']
dispatch_smh=pd.concat([df['dispatch_smh'],pd.DataFrame(dispatch_smh_predicted_poly)]).reset_index()

print('Financial Year\tSales\tDispatch SMH')
for i in range(len(sales)):
    print(fin_year[i],'\t',sales[i],'\t',dispatch_smh[0][i])

In [None]:
#plotting dispatch smh over years 
plt.figure(figsize=(12, 6))
plt.plot(fin_year,dispatch_smh[0],marker='o')
plt.xlabel('Financial Years')
plt.ylabel('Dispatch SMH')
plt.title('Dispatch SMH over years')
plt.grid()
plt.show()

In [None]:
#plotting dispatch smh over Sales
plt.figure(figsize=(12, 6))
plt.plot(sales,dispatch_smh[0],marker='o')
plt.xlabel('Sales')
plt.ylabel('Dispatch SMH')
plt.title('Dispatch SMH over Sales')
plt.grid()
plt.show()

# Prediction of outsorced smh

In [None]:
x_train=df['inhouse_smh'].values.reshape(-1,1)
outsourced_smh_train=df['outsourced_smh'].values

**Possible dependencies of Outsourced SMH with other features of the data**

In [None]:
x1_train=df[['inhouse_smh','dispatch_smh']]
x2_train=df[['inhouse_smh','dispatch_smh','sales']]
x3_train=df[['dispatch_smh','sales']]
x4_train=df[['dispatch_smh']]
x5_train=df[['sales']]
x6_train=df[['inhouse_smh']]
x7_train=df[['inhouse_smh', 'sales']]

In [None]:
sales=data['sales'][9:15].reset_index()
x1_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh'],
    'dispatch_smh' :  dispatch_smh_predicted_poly
})
x2_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh'],
    'dispatch_smh' :  dispatch_smh_predicted_poly,
    'sales': sales['sales']
})
x3_test=pd.DataFrame({
    'dispatch_smh' :  dispatch_smh_predicted_poly,
    'sales': sales['sales']
})
x4_test=pd.DataFrame({
    'dispatch_smh' :  dispatch_smh_predicted_poly
})
x5_test=pd.DataFrame({
    'sales': sales['sales']
})
x6_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh']
})
x7_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh'],
    'sales': sales['sales']
})

In [None]:
def poly_reg(i, x_train, x_test):
    print('COMBO', i+1)
    #model
    poly=PolynomialFeatures(degree=2)
    x_train_poly=poly.fit_transform(x_train)
    model=LinearRegression()
    model.fit(x_train_poly,outsourced_smh_train)

    #prediction on train data
    outsourced_smh_train_pred=model.predict(x_train_poly)
    print("predicted outsourced smh for training data:")
    print('Actual outsourced smh \t \t predicted outsourced smh')
    for j in range(len(x_train)):
        print(outsourced_smh_train[j],'\t\t', outsourced_smh_train_pred[j])

    #mse
    mse=mean_squared_error(outsourced_smh_train, outsourced_smh_train_pred)
    print('mse:',mse)
       
    #plot
    plt.figure(figsize=(10, 6))
    plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train, color='blue', label='Actual')
    plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train_pred, color='red', label='Predict')
    plt.title('Actual vs Predicted Outsourced SMH')
    plt.xlabel('Index')
    plt.ylabel('Outsourced SMH')
    plt.legend()
    plt.show()

    #prediction on unseen data
    x_test_poly=poly.transform(x_test)
    outsourced_smh_test_pred=model.predict(x_test_poly)
    print("Prediction on unseen data")
    for j in outsourced_smh_test_pred:
        print(j)
    print('\n\n')
    
    if i+1==6:
        return outsourced_smh_test_pred
    
    
        
inputs_train=[x1_train,x2_train,x3_train,x4_train,x5_train,x6_train,x7_train]
inputs_test=[x1_test, x2_test, x3_test, x4_test, x5_test, x6_test,x7_test]
for i in range(len(inputs_train)):
    if i+1==6:
        outsourced_smh_pred_poly=poly_reg(i, inputs_train[i], inputs_test[i])
    else:
        poly_reg(i, inputs_train[i], inputs_test[i])

**Various Models on the basis of inhouse dependency**

In [None]:
def models_for_outsourced_smh(model_name, model):
  print('\n',model_name, sep='')

  #model building
  model.fit(x_train, outsourced_smh_train)

  #prediction on train data
  outsourced_smh_train_pred=model.predict(x_train)
  print('On train dataset (Actual vs Predict)')
  print('Actual\t\tPredict')
  for i in range(len(outsourced_smh_train)):
    print(f'{outsourced_smh_train[i]}\t{outsourced_smh_train_pred[i]}')

  #mse
  mse=mean_squared_error(outsourced_smh_train, outsourced_smh_train_pred)
  print(f'\nMSE: {mse:.2f}\n')

  #plot
  plt.figure(figsize=(10, 6))
  plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train, color='blue', label='Actual')
  plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train_pred, color='red', label='Predict')
  plt.title('Actual vs Predicted Outsourced SMH')
  plt.xlabel('Index')
  plt.ylabel('Outsourced SMH')
  plt.legend()
  plt.show()

  #prediction on unseen data

  x_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh'],
    })

  outsourced_smh_test_pred=model.predict(x_test)

  print('\nPredicted outsourced smh for unseen data')
  print('Inhouse SMH\t\tOutsourced SMH')
  for i in range(len(x_test)):
    print(x_test['inhouse_smh'][i],'\t',outsourced_smh_test_pred[i])

models={
    'LINEAR REGRESSION':LinearRegression(),
    'DECISION TREE REGRESSION':DecisionTreeRegressor(),
    'RANDOM FOREST REGRESSION':RandomForestRegressor(n_estimators=100, max_depth=4),
    'GRADIENT BOOSTING REGRESSION':GradientBoostingRegressor(n_estimators=100, max_depth=4),
    'SUPPORT VECTOR REGRESSION':SVR(kernel='linear', C=1.0, epsilon=0.1)
}

for model_name, model in models.items():
  models_for_outsourced_smh(model_name, model)






**Final Model: Polynomial Regression with inhouse dependency**

In [None]:
outsourced_smh=pd.concat([df['outsourced_smh'], pd.DataFrame(outsourced_smh_pred_poly)], axis=0).reset_index()

print('Financial year\tInhouse SMH\tOutsoured SMH')
for i in range(len(fin_year)):
    print(fin_year[i], '\t',  inhouse_smh['smh'][i], '\t', outsourced_smh[0][i])

In [None]:
#plotting OutSourced smh over years 
plt.figure(figsize=(12, 6))
plt.plot(fin_year,outsourced_smh[0],marker='o')
plt.xlabel('Financial Years')
plt.ylabel('Outsourced SMH')
plt.title('Outsourced SMH over years')
plt.grid()
plt.show()