In [1]:
import pandas as pd
import numpy as np

def commission_calculation(csv):

    #load csv, assumption that these are one time input
    df_pol = pd.read_csv('policies.csv')
    
    #Communicate with stakeholders to reprocessed into a csv per table
    df_incent = pd.read_csv('incentive-table.csv') 

    #standardize as below:
    df_AvgPremiumPerPol = pd.read_csv('AvgPremiumPerPol Reward.csv') 
    df_AvgPremiumPerPol["AvgPremiumPerPol"] = (df_AvgPremiumPerPol["AvgPremiumPerPol"].astype(str).str.strip().str.replace(r"[^0-9.]", "", regex=True).replace("", "0")  .astype(float))

    df_SoldPremium = pd.read_csv('SoldPremium Reward.csv') 
    df_SoldPremium["SoldPremium"] = (df_SoldPremium["SoldPremium"].astype(str).str.strip().str.replace(r"[^0-9.]", "", regex=True).replace("", "0")  .astype(float))

    df_Policy_Count = pd.read_csv('Policy Count Reward.csv') 
    df_Policy_Count["IncentivePerPolicy"] = (df_Policy_Count["IncentivePerPolicy"].astype(str).str.strip().str.replace(r"[^0-9.]", "", regex=True).replace("", "0")  .astype(float))

    df_SoldPremium_Commission = pd.read_csv('SoldPremium Commission Percenta.csv') 
    columns = ["AvgPremiumPerPol_Reward_1", "AvgPremiumPerPol_Reward_2", "AvgPremiumPerPol_Reward_3", "AvgPremiumPerPol_Reward_4", "AvgPremiumPerPol_Reward_5"]

    for c in columns:
        df_SoldPremium_Commission[c] = (df_SoldPremium_Commission[c].astype(str).str.strip().str.replace(r"[^0-9.]", "", regex=True).replace("", "0").astype(float))

    #load xls, assumption that these are one time input
    df_persist = pd.read_excel('persistency.xls')

    #manual agent input file  
    df_agts = pd.read_csv(csv)

    #left join using PolicyType as the key to get policies issue by the respective agent
    df_pol_agts = pd.merge(df_pol, df_agts, on='PolicyType', how='left')

    #Get active policies only
    act_pol = df_pol_agts[df_pol_agts["PolicyStatus"] == "Active"]

    #count the number of active policies and calculate its premium 
    #group by agent id and agent name
    agg_agt = act_pol.groupby(["AgentID", "AgentName"]).agg(SoldPolicyCount=("PolicyID", "count"), 
                                               SoldPremium=("PremiumAmount", "sum")
                                              ).reset_index()

    #calculate the average premium per policy
    agg_agt["AvgPremiumPerPol"] = agg_agt["SoldPremium"]/agg_agt["SoldPolicyCount"]

    #inner join to get persistency using AgentID as key 
    agg_agt1 = pd.merge(agg_agt, df_persist, on='AgentID', how='inner')

    #persistency passed if >= 0.85
    agg_agt1["PassedPersistency"] = np.where(agg_agt1["Persistency"] >= 0.85, "Y", "N")

    #look up the reward for the sold policy count based on the ssold policy count of the agent
    def policy_count_reward(SoldPolicyCount):
        for x, row in df_Policy_Count.iterrows():
            if SoldPolicyCount > row["SoldPolicyCount"]:
                return row["IncentivePerPolicy"]
        return 0.0  

    agg_agt1["SoldPolicyCountIncentiveTier"] = agg_agt1["SoldPolicyCount"].apply(policy_count_reward)

    #calculate the average premium per policy
    agg_agt1["SoldPolicyCountIncentive"] = agg_agt1["SoldPolicyCountIncentiveTier"]*agg_agt1["SoldPolicyCount"]
    
    #look up the tier for the sold premium based on the sold premium of the agent
    def sold_premium_reward(SoldPremium):
        for x, row in df_SoldPremium.iterrows():
            if SoldPremium > row["SoldPremium"]:
                return int(row["Tier"].split()[1])
        return 0.0 

    agg_agt1["SoldPremiumRewardTier"] = agg_agt1["SoldPremium"].apply(sold_premium_reward)
    
    #Look up the tier for the avg premium per pol based on the  avg premium per pol of the agent
    def avg_premium_per_pol_reward(AvgPremiumPerPol):
        for x, row in df_AvgPremiumPerPol.iterrows():
            if AvgPremiumPerPol > row["AvgPremiumPerPol"]:
                return int(row["Tier"].split()[1])
        return 0.0 

    agg_agt1["AvgPremiumPerPol RewardTier"] = agg_agt1["AvgPremiumPerPol"].apply(avg_premium_per_pol_reward)
    
    #look up the matrix of both sold premium reward tier and avg premium per pol reward tier to get the sold premium comission percentage
    def sold_premium_commission_percenta(row):
        sold_premium_reward = int(row["SoldPremiumRewardTier"])
        avg_premium_per_pol_reward = int(row["AvgPremiumPerPol RewardTier"])

        if sold_premium_reward == 0 or avg_premium_per_pol_reward == 0:
            return 0.0  

        x = f"AvgPremiumPerPol_Reward_{avg_premium_per_pol_reward}"
        return df_SoldPremium_Commission.loc[sold_premium_reward - 1, x] #Tier is 1 but dataframe is 0

    agg_agt1["SoldCommissionPercentage"] = agg_agt1.apply(sold_premium_commission_percenta, axis=1)/100
    
    #calculate the sold commission 
    agg_agt1["SoldCommission"] = agg_agt1["SoldPremium"]*agg_agt1["SoldCommissionPercentage"]
    
    #calculate the final commission 
    agg_agt1["FinalCommission"] = (agg_agt1["SoldCommission"]+agg_agt1["SoldPolicyCountIncentive"])*agg_agt1["Persistency"]
    
    #format as percentage value
    agg_agt1["Persistency"] = (agg_agt1["Persistency"] * 100).round(0).astype(int)
    
    #round both premium and average premium 
    agg_agt1[["SoldPremium", "AvgPremiumPerPol","SoldPolicyCountIncentiveTier","SoldPremiumRewardTier"]] = agg_agt1[["SoldPremium", "AvgPremiumPerPol","SoldPolicyCountIncentiveTier","SoldPremiumRewardTier"]].round(0).astype(int)
    agg_agt1[["FinalCommission", "SoldCommission","SoldPolicyCountIncentive"]] = agg_agt1[["FinalCommission", "SoldCommission","SoldPolicyCountIncentive"]].round(2)
    
    #drop agent id
    agg_agt1.drop(columns=["AgentID"], inplace=True)

    print(agg_agt1.head())
    
    #output as excel
    agg_agt1.to_excel("final_commission.xlsx", index=False)
    
    return agg_agt1
    
#input agent file for refresh of agents
result = commission_calculation('agents.csv')

      AgentName  SoldPolicyCount  SoldPremium  AvgPremiumPerPol  Persistency  \
0   Wilson Ding             3002      1577254               525          100   
1   Jasmine Lee              373       210926               565           95   
2      Keith Ng             1156       590449               511           50   
3    Alvin Pang             2204      1171148               531           80   
4  Germaine Kim              772       392762               509           85   

  PassedPersistency  SoldPolicyCountIncentiveTier  SoldPolicyCountIncentive  \
0                 Y                             4                   12008.0   
1                 Y                             0                       0.0   
2                 N                             1                    1156.0   
3                 N                             3                    6612.0   
4                 Y                             0                       0.0   

   SoldPremiumRewardTier  AvgPremiumPerPol R