## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import quad

## Postgres Configuration

In [2]:
%run config_psql.ipynb

## Settings Configuration

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Initializing parameters

In [4]:
global p, alpha
p = 2.25
alpha = 25

In [7]:
df = pd.read_sql_query(sql="SELECT * FROM dwh.vw_resource_utilization", con=engine)
# df_sample = df[(df['match_id']=='433606') | (df['match_id']=='1384439')].reset_index(drop=True)
# df.info()

In [8]:
# df = df[(df['match_id']=='1359541')].reset_index(drop=True)

In [9]:
def curve_equation(x,W,r,w1):
    beta = (1 - pow(((x-w1)/(W-w1)), 2*p))
    return r * np.sqrt(beta * np.exp(alpha * np.sqrt(beta)))

# Define the function to integrate
def integrand(x,W,r,w1):
    beta = (1 - pow(((x-w1)/(W-w1)), 2*p))
    return r * np.sqrt(beta * np.exp(alpha * np.sqrt(beta)))

# Define the area function
def calculate_area(x1, x2, W, r, w1):
    return quad(integrand, x1, x2, args=(W,r,w1))[0]

In [10]:
def pms(scale):
    x_values = np.linspace(0, 9, 500)
    y_values = curve_equation(x_values, 9, 100, 0)    
    
    dict_pms = {} # No, not that.... Pressure multiplier scale
    total_area = quad(integrand, 0, 9, args=(9,100,0))[0]
    total_area += quad(integrand, 0, 1, args=(9,100,0))[0]
    dict_pms[1] = int(scale * quad(integrand, 0, 1, args=(9,100,0))[0] * 100 / total_area)
    
    for i in range(0,9):
        w2 = i+1
        area_range = calculate_area(i, w2, 9, 100, 0)
        area_perc = area_range*100/total_area
        dict_pms[w2+1] = max(10,int(scale * area_perc))

    return dict_pms

odi_pms = pms(3)
# t20_pms = {1: 24, 2: 24, 3: 22, 4: 20, 5: 17, 6: 13, 7: 11, 8: 9, 9: 5, 10: 5}
t20_pms = {1: 19, 2: 19, 3: 18, 4: 18, 5: 17, 6: 13, 7: 11, 8: 9, 9: 5, 10: 5}
# print(t20_pms)

In [11]:
W = 9
w1 = 0
r = 100
p = (W-w1)/4
alpha = r/4

for index, row in df.iterrows():
    r = row['avail_resource']
    if r<=0:
        df.loc[index, 'exp_resource_utilization'] = 0
        continue
    b = row['balls_remaining']
    w1 = 10 - row['wickets_in_hand']
    W = 9 if w1 <= 7 else 11
    p = (W-w1)/4
    alpha = r/4
    w2 = w1+1
    
    x_values = np.linspace(w1, W, 500)
    y_values = curve_equation(x_values, W, r, w1) 
    
    total_area = quad(integrand, w1, W, args=(W,r,w1))[0] 
    if w1==0:
        total_area += quad(integrand, 0, 1, args=(W,r,w1))[0] #Since 2 batsmans at start so first resource will be same for batsman no 2 (non-striker)

    area_range = calculate_area(w1, w2, W, r, w1)
    area_perc = area_range*100/total_area
    exp_resource_utilization = int(area_perc*b/100)
    df.loc[index, 'exp_resource_utilization'] = exp_resource_utilization

    if (row['match_type'] == 'ODI'):
        if (row['batting_pos'] <= 8) and (exp_resource_utilization > odi_pms[row['batting_pos']]):
            df.loc[index, 'pms'] = min(1 + np.exp(3.5*((exp_resource_utilization/odi_pms[row['batting_pos']])-2)), 2.5)
        else:
            df.loc[index, 'pms'] = 1
    elif(row['match_type'] == 'T20'):
        if (row['batting_pos'] <= 8) and (exp_resource_utilization > t20_pms[row['batting_pos']]):
            df.loc[index, 'pms'] = min(0.7 + np.exp(0.7*((exp_resource_utilization/t20_pms[row['batting_pos']])-2.72)), 2)
        else:
            df.loc[index, 'pms'] = 1
    if (index%10000)==0:
        print("{} rows executed".format(index))

0 rows executed


In [13]:
# Load match_player information into Stage table
with engine.connect() as conn:
    conn.execute("TRUNCATE TABLE stg.scorecard_batting")

# count_rows = df.to_sql('scorecard_batting', schema = 'stg', con = engine, if_exists='append', method = 'multi', index = False)

print("Loading data into stage table now")
count_rows = df.to_sql('scorecard_batting', 
                     schema = 'stg', 
                     con = engine, 
                     if_exists='append', 
                     method = 'multi', 
                     index = False,
                     chunksize = 4096)

In [None]:
# # Plot the curve
# plt.figure(figsize=(6, 3))
# plt.plot(x_values, y_values, label='Curve')

# # Plot the x-axis
# plt.axhline(0, color='black',linewidth=0.5)

# plt.fill_between(x_values, 0, y_values, where=((x_values >= w1) & (x_values <= w2)), color='gray', alpha=0.5)

# # Add labels and legend
# plt.title('Curve and Area Calculation')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.legend()

# # Show the plot
# plt.grid(True)
# plt.show()

# print("Area between w1 and w2:", area_range)
# print("Total area under the curve:", total_area)
# print("Area percent:", area_perc)