# Data loading and Preprocessing

In [None]:
#import necessary libraries

import pandas as pd
import numpy as np

In [None]:
data=pd.read_csv('datasets copies/DispatchSummary.csv')

In [None]:
data.columns

In [None]:
data.columns=['financial_year', 'sales', 'man_power', 'available_hours', 'inc_smh', 'efficiency', 'smh', 'booked', 'smh_lacs']

In [None]:
data.columns

In [None]:
dataDic={}
dataDic['financial_year']=data['financial_year'][0:9]
dataDic['sales']=data['sales'][0:9]
dataDic['man_power']=data['man_power'][0:9]
dataDic['efficiency']=data['efficiency'][0:9]
dataDic['booked']=data['booked'][0:9]
dataDic['dispatch_smh']=data['inc_smh'][0:9]
dataDic['inhouse_smh']=data['smh'][0:9]
dataDic['outsourced_smh']=data['smh_lacs'][0:9]

print(dataDic)

In [None]:
df=pd.DataFrame(dataDic)
print(df)

# Efficiency Prediction

In [None]:
k=len(data['efficiency'].dropna())

In [None]:
def efficiency_custom(t,n,last_year):
    eff_fut=[last_year]
    for i in range(t):
        temp=eff_fut[-1]+eff_fut[-1]*(n/100)
        eff_fut.append(temp)
    return eff_fut[1:]

t=int(input('enter the number of future values to be calculated:'))
n=int(input('Enter the rate of increase (%):'))
last_year=data['efficiency'][k-1]
fut_efficiency=efficiency_custom(t,n,last_year)

In [None]:
#plot(manpower vs efficiency)
import matplotlib.pyplot as plt
efficiency=pd.concat([df['efficiency'],pd.DataFrame(fut_efficiency)],axis=0).reset_index()
manpower=data['man_power']
fin_year=data['financial_year']


plt.figure(figsize=(12, 6))
plt.plot(fin_year, efficiency[0], marker='o')
plt.xlabel('Financial year')
plt.ylabel('Efficiency')
plt.title('Efficiency over years')
plt.grid()
plt.show()



In [None]:

plt.figure(figsize=(12, 6))
plt.plot(manpower, efficiency[0], marker='o')
plt.xlabel('Manpower')
plt.ylabel('Efficiency')
plt.title('Efficiency over Man power')
plt.grid()
plt.show()

# Inhouse smh 

In [None]:
booked_test=data['booked'][9:15].reset_index()
financial_year_test=data['financial_year'][9:15].reset_index()

inhouse_smh_predicted = (booked_test['booked'] * fut_efficiency) / 100
inhouse_smh_predicted = inhouse_smh_predicted.reset_index()
inhouse_smh_predicted.columns=['index', 'smh']

In [None]:
print('Prediction on future data')
print('Financial year\tEfficiency\tInhouse SMH\t\tBooked')
for i in range(len(booked_test)):
    print(financial_year_test['financial_year'][i],'\t',fut_efficiency[i],'\t',inhouse_smh_predicted['smh'][i],'\t',booked_test['booked'][i])


**Overall Data**

In [None]:
manpower=data['man_power']
fin_year=data['financial_year']

inhouse_smh=pd.concat([pd.DataFrame({'smh':df['inhouse_smh']}), pd.DataFrame(inhouse_smh_predicted['smh'])], axis=0)
inhouse_smh=inhouse_smh.reset_index()

print('Financial year\tMan Power\tEfficiency\tInnhouse SMH')
for i in range(len(manpower)):
    print(fin_year[i], '\t\t', manpower[i], '\t\t', efficiency[0][i], '\t\t\t', inhouse_smh['smh'][i], sep='')

In [None]:
#plot(financial year vs inhouse smh)

plt.figure(figsize=(12, 6))
plt.plot(fin_year, inhouse_smh['smh'], marker='o')
plt.xlabel('Financial Year')
plt.ylabel('Inhouse SMH')
plt.title('Inhouse SMH over years')
plt.grid()
plt.show()

In [None]:
manpower_train=df['man_power'].values.reshape(-1,1)
efficiency_train=df['efficiency'].values

# Dispatch smh 

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [None]:
sales_train=df['sales'].values.reshape(-1,1)
dispatch_smh_train=df['dispatch_smh'].values

**Final Model: Polynomial Regression**

In [None]:
poly_dispatch=PolynomialFeatures(degree=2)
sales_train_poly=poly_dispatch.fit_transform(sales_train)

dispatch_model_poly=LinearRegression()
dispatch_model_poly.fit(sales_train_poly,dispatch_smh_train)

#dispatch predicted on training sales data
dispatch_train_predict_poly=dispatch_model_poly.predict(sales_train_poly)
print('Predicted dispatch values for training sales data:')
print(dispatch_train_predict_poly)

#mean squared error
mean_squared_error_dispatch_poly=mean_squared_error(dispatch_smh_train,dispatch_train_predict_poly)
print('Mean_squared_error=',mean_squared_error_dispatch_poly)

plt.plot(sales_train,dispatch_smh_train, color='blue', label='Actual')
plt.plot(sales_train,dispatch_train_predict_poly, color='red', label='predicted')
plt.xlabel('sales')
plt.ylabel('dispatch_smh')
plt.show()


#dispatch prediction for future-unseeen sales data
sales_test=data['sales'][9:15].values.reshape(-1,1)
sales_test_poly=poly_dispatch.transform(sales_test)

dispatch_smh_predicted_poly=dispatch_model_poly.predict(sales_test_poly)

print("Dispatch smh predicted for future-unseen data:")
print('Sales\t\tDispatch_smh')
for i in range(len(sales_test)):
    print(*sales_test[i],'\t',dispatch_smh_predicted_poly[i])


In [None]:
from decimal import Decimal, ROUND_DOWN

for i in range(6):
    value=Decimal(dispatch_smh_predicted_poly[i])
    dispatch_smh_predicted_poly[i]=value.quantize(Decimal('0.01'), rounding=ROUND_DOWN)
print(dispatch_smh_predicted_poly)

In [None]:
import pickle

pickle.dump(poly_dispatch, open('dispatchPolyModel.pkl' ,'wb'))
pickle.dump(dispatch_model_poly, open('dispatchModel.pkl' ,'wb'))

In [None]:
sales=data['sales']
dispatch_smh=pd.concat([df['dispatch_smh'],pd.DataFrame(dispatch_smh_predicted_poly)]).reset_index()

print('Financial Year\tSales\tDispatch SMH')
for i in range(len(sales)):
    print(fin_year[i],'\t',sales[i],'\t',dispatch_smh[0][i])

In [None]:
predefinedValues = pd.DataFrame({
    'financial_year' : data['financial_year'][9:15],
    'sales' : data['sales'][9:15],
    'booked' : data['booked'][9:15],
    'man_power' : data['man_power'][9:15],
    'dispatch_smh' : dispatch_smh_predicted_poly
})

In [None]:
import pickle

pickle.dump(predefinedValues, open('dispatchPredictValues.pkl', 'wb'))

In [None]:
#plotting dispatch smh over years 
plt.figure(figsize=(12, 6))
plt.plot(fin_year,dispatch_smh[0],marker='o')
plt.xlabel('Financial Years')
plt.ylabel('Dispatch SMH')
plt.title('Dispatch SMH over years')
plt.grid()
plt.show()

In [None]:
#plotting dispatch smh over Sales
plt.figure(figsize=(12, 6))
plt.plot(sales,dispatch_smh[0],marker='o')
plt.xlabel('Sales')
plt.ylabel('Dispatch SMH')
plt.title('Dispatch SMH over Sales')
plt.grid()
plt.show()

# Outsorced smh

In [None]:
x_train=df['inhouse_smh'].values.reshape(-1,1)
outsourced_smh_train=df['outsourced_smh'].values

In [None]:
sales=data['sales'][9:15].reset_index()
x_test=pd.DataFrame({
    'inhouse_smh' : inhouse_smh_predicted['smh']
})

In [None]:
poly=PolynomialFeatures(degree=2)
x_train_poly=poly.fit_transform(x_train)
model=LinearRegression()
model.fit(x_train_poly,outsourced_smh_train)

#prediction on train data
outsourced_smh_train_pred=model.predict(x_train_poly)
print("predicted outsourced smh for training data:")
print('Actual outsourced smh \t \t predicted outsourced smh')
for j in range(len(x_train)):
    print(outsourced_smh_train[j],'\t\t', outsourced_smh_train_pred[j])

#mse
mse=mean_squared_error(outsourced_smh_train, outsourced_smh_train_pred)
print('mse:',mse)
       
#plot
plt.figure(figsize=(10, 6))
plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train, color='blue', label='Actual')
plt.plot(range(len(outsourced_smh_train)), outsourced_smh_train_pred, color='red', label='Predict')
plt.title('Actual vs Predicted Outsourced SMH')
plt.xlabel('Index')
plt.ylabel('Outsourced SMH')
plt.legend()
plt.show()

#prediction on unseen data
x_test_poly=poly.transform(x_test)
outsourced_smh_test_pred=model.predict(x_test_poly)
print("Prediction on unseen data")
for j in outsourced_smh_test_pred:
    print(j)

In [None]:
outsourced_smh=pd.concat([df['outsourced_smh'], pd.DataFrame(outsourced_smh_test_pred)], axis=0).reset_index()

print('Financial year\tInhouse SMH\tOutsoured SMH')
for i in range(len(fin_year)):
    print(fin_year[i], '\t',  inhouse_smh['smh'][i], '\t', outsourced_smh[0][i])

In [None]:
#plotting OutSourced smh over years 
plt.figure(figsize=(12, 6))
plt.plot(fin_year,outsourced_smh[0],marker='o')
plt.xlabel('Financial Years')
plt.ylabel('Outsourced SMH')
plt.title('Outsourced SMH over years')
plt.grid()
plt.show()

In [None]:
predicted=pd.DataFrame({
    'financial_year' : data['financial_year'][9:15].reset_index()['financial_year'],
    'sales' : data['sales'][9:15].reset_index()['sales'],
    'man_power': data['man_power'][9:15].reset_index()['man_power'],
    'efficiency' : fut_efficiency['predicted_mean'],
    'dispatch_smh' : dispatch_smh_predicted_poly,
    'inhouse_smh' : inhouse_smh_predicted['smh'],
    'outsourced_smh' : outsourced_smh_test_pred

})

print(predicted)