## Load Libraries

In [1]:
import pandas as pd
# import modin.pandas as pd
from dotenv import load_dotenv
import os
from sqlalchemy import create_engine
from urllib.parse import quote_plus

## Load Environment variables

In [2]:
# Load the .env file
load_dotenv()

# Fetch environment variables
db_address = os.getenv('DB_ADDRESS')
db_port = os.getenv('DB_PORT')
db_username = os.getenv('DB_USERNAME')
db_password = quote_plus(os.getenv('DB_PASSWORD'))
db_name = os.getenv('DB')

## Functions

In [3]:
def get_db_connection():
    """
    Establishes a connection to the PostgreSQL database using SQLAlchemy.
    - The `postgresql+psycopg2` dialect indicates that we're using PostgreSQL as the database backend.
    - The `{DB_USER}:{DB_PASS}` part specifies the username and password to use for connections.
    - The `@{DB_IP}:{DB_PORT}` part specifies the hostname and port number to use when connecting to the database.
    - The `/`${DB}` part specifies the database name or schema to connect to.

    :return: engine.connect(): An active connection object to the PostgreSQL database.
    """
    engine = create_engine(f'postgresql://{db_username}:{db_password}@{db_address}:{db_port}/{db_name}')

    # Establish a connection to the database:
    #   - This returns an active connection object, which can be used to execute queries and interact with the database.
    return engine.connect()

def fetch_data_from_sql(query):
    """
    Fetches data from a SQL table and returns it as a pandas DataFrame.

    :param query: (str) The SQL query to execute on the database.
    :return: pd.DataFrame: A pandas DataFrame containing the results of the SQL query.
    """

    # Establish a connection to the PostgreSQL database using get_db_connection():
    conn = get_db_connection()

    # Execute the SQL query on the database and store the result in a pandas DataFrame:
    df = pd.read_sql(query, conn)

    # Close the connection to the database to free up resources:
    conn.close()

    # Return the pandas DataFrame containing the results of the SQL query:
    return df

def change_column_names(column_list, std=True):
    if std:
        return [col.strip().title().replace('_', ' ') for col in column_list]

    else:
        return [col.strip().lower().replace(' ', '_') for col in column_list]

## Get Data

### Transaction

In [4]:
query_transaction = '''
    SELECT
        *
    FROM
        erp_data.funds_received.transactions
    '''

transaction = fetch_data_from_sql(query_transaction)

### Projects

In [5]:
 # Get existing master
query_project_dir_old = '''
        SELECT
            *
        FROM
            erp_data.funds_received.project_directory
'''

project_directory = fetch_data_from_sql(query_project_dir_old)

# Standardise column names
project_directory.columns = change_column_names(project_directory.columns)

### WBS Data

In [6]:
query_transaction = '''
        SELECT
            *
        FROM
            erp_data.funds_received.wbs
        '''

wbs = fetch_data_from_sql(query_transaction)

### Data Cleaning

In [7]:
# Remove extra spaces from column names
transaction_col = transaction.columns
transaction_col = [col.strip() for col in transaction_col]
transaction.columns = transaction_col

wbs_col = wbs.columns
wbs_col = [col.strip() for col in wbs_col]
wbs.columns = wbs_col

In [8]:
if transaction.shape[0] > 0 and wbs.shape[0] > 0 and project_directory.shape[0] > 0:
    # Perform WBS mapping
    transaction = transaction.merge(wbs[['WBS', 'WBS Details']].drop_duplicates(), how='left',
                                    left_on='Project definition', right_on='WBS')

    transaction = transaction.merge(wbs[['Sub WBS', 'SUB WBS Details']].drop_duplicates(), how='left',
                                    left_on='Object', right_on='Sub WBS')

    transaction['Project Id'] = transaction['SUB WBS Details'].apply(
        lambda x: str(x).strip().lower().replace(' ', '_'))

    # Assign Category
    transaction = transaction.merge(project_directory[['Project Id', 'Category']], how='left', on='Project Id').copy()

In [9]:
start_date = pd.to_datetime('01-04-2024', format='%d-%m-%Y')
end_date = pd.to_datetime('24-12-2024', format='%d-%m-%Y')

In [10]:
transaction['Ref. document number'] = transaction['Ref. document number'].fillna('').astype(str)

In [11]:
transaction['Document Number'] = transaction['Document Number'].fillna('').astype(str)

### Cleaned Data

In [12]:
# Final Data
final_data = transaction[(transaction['Document Date'].between(start_date, end_date))]

In [13]:
final_data_bak = final_data.copy()

## Analysis

### Logic 1

#### Steps:
- For Indian Donation
    - Take all the DR data
    - Filter by Cost Element ≠ 550510, 550511
    - For each "Project Definition",
        - Get the total for each Project Definition
        - Get the corresponding "Ref. document number". There could be multiple document numbers.
        - For each document numbers,
            - Get all the SB entries
            - Get the total of all SB entries
        - Get the total of the SB entries and compare it with the total for each Project Definition
        - If the difference is same as total for each project definition, then use the original project definition as the Sub-WBS code
        - If the difference is less, then categorise the difference as 'Un-assigned' in the Sub-WBS code
    - Create a Dataframe of project wise breakup for each "Project Definition"


In [14]:
# Step 1
dr_data = final_data[
    (final_data['Document type'] == 'DR') &

    # Step 2
    ~(final_data['Cost Element'].isin([550510, 550511]))
]

In [15]:
dr_data['Val/COArea Crcy'].sum()

np.float64(-691618872.62)

In [16]:
dr_data.sample(10)

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
119325,1002208891.0,2406011451.0,2024-10-22,2024-10-18,DR,DO/2023-KCOR,DO/2023-KCORREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-528000.0,-528000.0,-528000.0,,DO/2023-KCOR,K Corp Charitable Foundation,DO/2023-KCORREV,K Corp Charitable Foundation-REV,k_corp_charitable_foundation-rev,R&D
124748,1002161253.0,2406009207.0,2024-08-23,2024-08-22,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2000.0,-2000.0,-2000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,
124797,1002207059.0,2406011416.0,2024-10-21,2024-10-17,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-25000.0,-25000.0,-25000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,
124820,1002226900.0,2406011707.0,2024-11-04,2024-10-19,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-285000.0,-285000.0,-285000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,
122360,1002178843.0,2406010140.0,2024-09-13,2024-09-12,DR,DO/2024-RXOS,DO/2024-RXOSREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-135348.0,-135348.0,-135348.0,,DO/2024-RXOS,UG Scholarship,DO/2024-RXOSREV,UG Scholarship-REV,ug_scholarship-rev,
124632,1002062832.0,2406001158.0,2024-04-22,2024-04-21,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-10000.0,-10000.0,-10000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,
124694,1002103642.0,2406002690.0,2024-06-24,2024-06-21,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-10000.0,-10000.0,-10000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,
125872,1002182997.0,2406010391.0,2024-09-23,2024-08-22,DR,DO/2001-IDPF,DO/2001-IDPFREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-10387.0,-10387.0,-10387.0,,DO/2001-IDPF,INSTITUTE DEVELOPMENT FUND,DO/2001-IDPFREV,INSTITUTE DEVELOPMENT FUND,institute_development_fund,Strategic Needs
132093,1002088573.0,2406002037.0,2024-06-03,2024-05-18,DR,DO/2007-YFA,DO/2007-YFAREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-1000.0,-1000.0,-1000.0,,DO/2007-YFA,CLASS OF'82 LEGACY PROJECT JOINING BONUS,DO/2007-YFAREV,CLASS OF'82 LEGACY PROJECT -REV,class_of'82_legacy_project_-rev,
124708,1002115210.0,2406003303.0,2024-07-09,2024-07-08,DR,DO/2000-POOL,DO/2021-POOL003,2024.0,550508.0,Rev-Domestic (Indian fund),...,-1000.0,-1000.0,-1000.0,,DO/2000-POOL,Donation Pool Account,DO/2021-POOL003,DOMESTIC SCHOLARSHIP,domestic_scholarship,


In [17]:
project_wise_breakup = pd.DataFrame()

for proj in dr_data['Project definition'].unique():
    # Step 3
    proj_total_dr = abs(dr_data.loc[dr_data['Project definition'] == proj, 'Val/COArea Crcy'].sum())

    # Step 4
    ref_nos = final_data.loc[
        final_data['Project definition'] == proj,
        'Ref. document number'
    ]

    # Step 5
    dr_sb_entries = pd.DataFrame()
    for ref in ref_nos:
        df = final_data[
            (final_data['Ref. document number'] == ref) &
            (final_data['Document type'] =='SB')
        ]

        dr_sb_entries = pd.concat([dr_sb_entries, df], ignore_index=True)

    dr_sb_entries = dr_sb_entries.copy()

    # Step 6
    proj_total_sb = abs(dr_sb_entries['Val/COArea Crcy'].sum())

    # Step 7
    proj_total_diff = proj_total_dr - proj_total_sb

    # Step 8
    if proj_total_sb == 0:
        dr_sb_entries = dr_data[dr_data['Project definition'] == proj]
        # dr_sb_entries['Val/COArea Crcy'] = dr_sb_entries['Val/COArea Crcy'].apply(abs)

    elif proj_total_diff < proj_total_dr:
        unassigned = {
            'Val/COArea Crcy': proj_total_diff,
            'WBS': 'Unassigned',
            'WBS Details': 'Unassigned',
            'Sub WBS': 'Unassigned',
            'SUB WBS Details': 'Unassigned',
            'Project Id': 'Unassigned',
            'Category': 'Unassigned'
        }

        dr_sb_entries = pd.concat([dr_sb_entries, pd.DataFrame(data=unassigned, index=[0])], axis=0, ignore_index=True)

    elif proj_total_diff > proj_total_dr:
        print(proj, 'difference is high')

    # Step Last
    dr_sb_entries.insert(0, 'OG Project', proj)
    # og_project = pd.DataFrame(data={
    #     'OG Project': proj
    # }, index=range(0, dr_sb_entries.shape[0]))
    #
    # dr_sb_entries = pd.concat([og_project, dr_sb_entries], axis=1, ignore_index=True)

    project_wise_breakup = pd.concat([project_wise_breakup, dr_sb_entries], ignore_index=True)

In [18]:
project_wise_breakup['Val/COArea Crcy'].sum()

np.float64(20872748.519999817)

In [19]:
project_wise_breakup

Unnamed: 0,OG Project,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
0,DO/2009-HATS,1002070478.0,2406001521.0,2024-05-06,2024-05-01,DR,DO/2009-HATS,DO/2009-HATSREV,2024.0,550508.0,...,-501.0,-501.0,-501.0,,DO/2009-HATS,HOSTEL ALUMNI TEAM STEWARDSHIP,DO/2009-HATSREV,Hostels - Receipt,hostels_-_receipt,Infrastructure
1,DO/2014-PDMA,1002199191.0,2426007228.0,2024-10-11,2024-10-11,SB,DO/2014-PDMA,DO/2014-PDMA001,2024.0,510508.0,...,-10000.0,-10000.0,-10000.0,,DO/2014-PDMA,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,DO/2014-PDMA001,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,shrimati_prakashvati_devi_gupta_merit_aw,
2,DO/2014-PDMA,1002199191.0,2426007228.0,2024-10-11,2024-10-11,SB,DO/2014-PDMA,DO/2014-PDMA001,2024.0,550512.0,...,10000.0,10000.0,10000.0,,DO/2014-PDMA,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,DO/2014-PDMA001,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,shrimati_prakashvati_devi_gupta_merit_aw,
3,DO/2014-PDMA,1002199191.0,2426007228.0,2024-10-11,2024-10-11,SB,DO/2014-PDMA,DO/2014-PDMAREV,2024.0,550509.0,...,-10000.0,-10000.0,-10000.0,,DO/2014-PDMA,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,DO/2014-PDMAREV,SHRIMATI PRAKASHVATI DEVI GUPTA MERIT AW,shrimati_prakashvati_devi_gupta_merit_aw,
4,DO/2014-PDMA,1002199191.0,2426007228.0,2024-10-11,2024-10-11,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,550512.0,...,10000.0,10000.0,10000.0,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6320,DO/2007-YFA,1002236256.0,2426008214.0,2024-11-07,2024-10-25,SB,DO/2007-YFA,DO/2007-YFAREV,2024.0,550508.0,...,2996367.0,2996367.0,2996367.0,,DO/2007-YFA,CLASS OF'82 LEGACY PROJECT JOINING BONUS,DO/2007-YFAREV,CLASS OF'82 LEGACY PROJECT -REV,class_of'82_legacy_project_-rev,
6321,DO/2007-YFA,1002236256.0,2426008214.0,2024-11-07,2024-10-25,SB,DO/2007-YFA,DO/2024-YFA002,2024.0,550508.0,...,-2996367.0,-2996367.0,-2996367.0,,DO/2007-YFA,CLASS OF'82 LEGACY PROJECT JOINING BONUS,DO/2024-YFA002,Young Faculty Fellowship,young_faculty_fellowship,
6322,DO/2007-YFA,,,,NaT,,,,,,...,,,,,Unassigned,Unassigned,Unassigned,Unassigned,Unassigned,Unassigned
6323,DO/2007-SSDF,1002068501.0,2406001467.0,2024-05-02,2024-04-26,DR,DO/2007-SSDF,DO/2007-SSDFREV,2024.0,550508.0,...,-25.0,-25.0,-25.0,,DO/2007-SSDF,SCHOLARSHIPS&STUDENT DEVT.FUND,DO/2007-SSDFREV,SCHOLARSHIPS&STUDENT DEVT.FUND,scholarships&student_devt.fund,


In [20]:
dr_data.loc[dr_data['Project definition'] == proj, 'Val/COArea Crcy'].sum()

np.float64(-50.0)

In [21]:
dr_data[dr_data['Project definition'] == proj]

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
132635,1002068501.0,2406001467.0,2024-05-02,2024-04-26,DR,DO/2007-SSDF,DO/2007-SSDFREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-25.0,-25.0,-25.0,,DO/2007-SSDF,SCHOLARSHIPS&STUDENT DEVT.FUND,DO/2007-SSDFREV,SCHOLARSHIPS&STUDENT DEVT.FUND,scholarships&student_devt.fund,
132636,1002089118.0,2406002061.0,2024-06-03,2024-05-12,DR,DO/2007-SSDF,DO/2007-SSDFREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-25.0,-25.0,-25.0,,DO/2007-SSDF,SCHOLARSHIPS&STUDENT DEVT.FUND,DO/2007-SSDFREV,SCHOLARSHIPS&STUDENT DEVT.FUND,scholarships&student_devt.fund,


In [22]:
dr_sb_entries

Unnamed: 0,OG Project,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
132635,DO/2007-SSDF,1002068501.0,2406001467.0,2024-05-02,2024-04-26,DR,DO/2007-SSDF,DO/2007-SSDFREV,2024.0,550508.0,...,-25.0,-25.0,-25.0,,DO/2007-SSDF,SCHOLARSHIPS&STUDENT DEVT.FUND,DO/2007-SSDFREV,SCHOLARSHIPS&STUDENT DEVT.FUND,scholarships&student_devt.fund,
132636,DO/2007-SSDF,1002089118.0,2406002061.0,2024-06-03,2024-05-12,DR,DO/2007-SSDF,DO/2007-SSDFREV,2024.0,550508.0,...,-25.0,-25.0,-25.0,,DO/2007-SSDF,SCHOLARSHIPS&STUDENT DEVT.FUND,DO/2007-SSDFREV,SCHOLARSHIPS&STUDENT DEVT.FUND,scholarships&student_devt.fund,


In [23]:
final_data.sample(10)

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
97273,1002155775.0,2417017460.0,2024-08-27,2024-08-13,KR,DO/2021-DCIL,DO/2021-DCIL001,2024.0,510150.0,EXP-CONTINGENCIES,...,12500.0,12500.0,12500.0,,DO/2021-DCIL,Dissemination of innovative rural techno,DO/2021-DCIL001,Dissemination of inno. rural tec EXP,dissemination_of_inno._rural_tec_exp,
74508,1002102577.0,2426002320.0,2024-06-20,2024-06-19,SB,DO/2019-CSRP,DO/2021-ATPL001,2024.0,510076.0,EXP-OTHER CHARGES,...,6660.0,6660.0,6660.0,,DO/2019-CSRP,CSR PROJECT,DO/2021-ATPL001,Ani Technologies Pvt. Ltd.,ani_technologies_pvt._ltd.,
117862,1002186187.0,153894.0,2024-09-30,2024-09-30,H6,DO/2023-BEEK,DO/2023-BEEK001,2024.0,611104.0,Pay & Allowances - Project,...,47000.0,47000.0,47000.0,,DO/2023-BEEK,Beekeeping Program Interventions to Boos,DO/2023-BEEK001,Beekeeping Program Interventions-EXP,beekeeping_program_interventions-exp,
70855,1002211537.0,2417027866.0,2024-11-12,2024-10-10,KR,DO/2018-SVCF,DO/2018-SVCF001,2024.0,510506.0,Tour and Travels,...,35229.0,35229.0,35229.0,,DO/2018-SVCF,SILICON VALLEY COMM.FOUNDATION/ UNIV OF,DO/2018-SVCF001,SILICON VALLEY COMM.FOUNDATION/ UNIV OF,silicon_valley_comm.foundation/_univ_of,
95895,1002068524.0,2417001861.0,2024-05-02,2024-04-15,KR,DO/2021-COGE,DO/2021-COGE002-010,2024.0,510148.0,EXP-CONSUMABLES,...,24980.6,24980.6,24980.6,,DO/2021-COGE,"Center of excellence in Oil,Gas & Energy",DO/2021-COGE002-010,Estimation of CO2 storage potential in,estimation_of_co2_storage_potential_in,
110544,1002126443.0,2426003375.0,2024-07-15,2024-07-15,SB,DO/2021-TCA21,DO/2021-TCAI002-004,2024.0,550001.0,Rev-EQUIPMENT&TOOLS,...,100000.0,100000.0,100000.0,,DO/2021-TCA21,TechnoCraft Center for Applied AI,DO/2021-TCAI002-004,Labeling 3D medical images,labeling_3d_medical_images,
96789,1002148114.0,2417013816.0,2024-08-05,2024-06-17,KR,DO/2021-COGE,DO/2024-COGE002-018,2024.0,510506.0,Tour and Travels,...,1050.0,1050.0,1050.0,,DO/2021-COGE,"Center of excellence in Oil,Gas & Energy",DO/2024-COGE002-018,Enhanced coalbed methane recovery with p,enhanced_coalbed_methane_recovery_with_p,
133653,1002136384.0,2426003754.0,2024-07-23,2024-07-22,SB,DO/2009-BHFL,DO/2009-BHFL001,2024.0,550509.0,Rev-General Expence,...,400000.0,400000.0,400000.0,,DO/2009-BHFL,BHARAT FORGE LIMITED,DO/2009-BHFL001,MEMS DEPTT BHFL,mems_deptt_bhfl,
115654,1002165721.0,2426005207.0,2024-08-28,2024-08-28,SB,DO/2022-SBST,DO/2022-SBST001,2024.0,550020.0,Rev-CONSUMABLES,...,2000000.0,2000000.0,2000000.0,,DO/2022-SBST,IITB Trust Lab- Dr.Shridhar Shukla funds,DO/2022-SBST001,IITB Trust Lab -Dr.Shridhar Shukla funds,iitb_trust_lab_-dr.shridhar_shukla_funds,
75175,1002099860.0,2417006770.0,2024-06-14,2024-06-03,KR,DO/2019-CSRP,DO/2021-MLSP,2024.0,510148.0,EXP-CONSUMABLES,...,13570.0,13570.0,13570.0,,DO/2019-CSRP,CSR PROJECT,DO/2021-MLSP,Development of Clinical tests for Covid-,development_of_clinical_tests_for_covid-,


In [24]:
final_data[final_data['SUB WBS Details'].fillna('').str.contains('91')]

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
46860,1002235578.0,2426008190.0,2024-11-07,2024-11-07,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,510225.0,EXP-ADMIN. EXP.,...,-15000.0,-15000.0,-15000.0,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46868,1002235578.0,2426008190.0,2024-11-07,2024-11-07,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550149.0,REV-ADMIN.EXP.,...,15000.0,15000.0,15000.0,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46869,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-637795.5,-637795.5,-637795.5,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46870,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-1247186.55,-1247186.55,-1247186.55,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46871,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2756927.23,-2756927.23,-2756927.23,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46872,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2429451.31,-2429451.31,-2429451.31,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46873,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2606628.3,-2606628.3,-2606628.3,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46874,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2874166.13,-2874166.13,-2874166.13,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46875,1002148858.0,2426004303.0,2024-08-06,2024-08-06,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550508.0,Rev-Domestic (Indian fund),...,-2874166.13,-2874166.13,-2874166.13,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,
46876,1002240240.0,2426008393.0,2024-11-11,2024-11-11,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,550509.0,Rev-General Expence,...,15426321.15,15426321.15,15426321.15,,DO/2015-LP91,CLASS OF 1991 LEGACY PROJECT,DO/2015-LP91001,CLASS OF 1991 LEGACY PROJECT,class_of_1991_legacy_project,


In [25]:
data_1 = fetch_data_from_sql('''
SELECT
    *
FROM
    erp_data.funds_received.transactions
WHERE
    "Project definition" LIKE '%%91%%' AND
    "Document type" = 'SB'
''')

In [26]:
data_1

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Exchange Rate Type,Value Type,Value in Obj. Crcy,Report currency,Val.in rep.cur.,Vbl.Val./COCrcy,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text
0,1000198000.0,1826006000.0,2018-10-05,2017-12-07,SB,DO/2015-LP91,DO/2015-LP91REV,2018.0,550509.0,Rev-General Expence,...,M,4.0,4000000.0,INR,4000000.0,4000000.0,4000000.0,4000000.0,4000000.0,
1,1000473000.0,1926002000.0,2019-06-19,2019-06-19,SB,DO/2015-LP91,DO/2015-LP91REV,2019.0,550509.0,Rev-General Expence,...,M,4.0,205792.0,INR,205792.0,205792.0,205792.0,205792.0,205792.0,
2,1000633000.0,1926009000.0,2019-12-24,2019-12-23,SB,DO/2015-LP91,DO/2015-LP91REV,2019.0,550509.0,Rev-General Expence,...,M,4.0,2600000.0,INR,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,
3,1000743000.0,1926013000.0,2020-03-11,2020-03-11,SB,DO/2015-LP91,DO/2015-LP91REV,2019.0,550509.0,Rev-General Expence,...,M,4.0,-750000.0,INR,-750000.0,-750000.0,-750000.0,-750000.0,-750000.0,
4,1001492000.0,2226006000.0,2022-10-10,2022-10-10,SB,DO/2015-LP91,DO/2015-LP91REV,2022.0,550509.0,Rev-General Expence,...,M,4.0,15000.0,INR,15000.0,15000.0,15000.0,15000.0,15000.0,
5,1002236000.0,2426008000.0,2024-11-07,2024-11-07,SB,DO/2015-LP91,DO/2015-LP91REV,2024.0,550509.0,Rev-General Expence,...,M,4.0,-15000.0,INR,-15000.0,-15000.0,-15000.0,-15000.0,-15000.0,
6,1002240000.0,2426008000.0,2024-11-11,2024-11-11,SB,DO/2015-LP91,DO/2015-LP91REV,2024.0,550509.0,Rev-General Expence,...,M,4.0,45538930.43,INR,45538930.43,45538930.43,45538930.43,45538930.43,45538930.43,
7,1001030000.0,2026010000.0,2021-03-26,2021-03-26,SB,DO/2015-LP91,DO/2015-LP91REV,2020.0,550511.0,Rev-IITBHF Donation,...,M,4.0,-2293539.8,INR,-2293539.8,-2293539.8,-2293539.8,-2293539.8,-2293539.8,
8,1001493000.0,2226006000.0,2022-10-10,2022-10-10,SB,DO/2015-LP91,DO/2015-LP91001,2022.0,510225.0,EXP-ADMIN. EXP.,...,M,4.0,15000.0,INR,15000.0,15000.0,15000.0,15000.0,15000.0,
9,1002236000.0,2426008000.0,2024-11-07,2024-11-07,SB,DO/2015-LP91,DO/2015-LP91001,2024.0,510225.0,EXP-ADMIN. EXP.,...,M,4.0,-15000.0,INR,-15000.0,-15000.0,-15000.0,-15000.0,-15000.0,


### Logic 2
- For each Projects in Transaction without any filter
    - Pick the SB transaction for that project/wbs code
    - Perform sub-total and get rid of the records which nullify each other i.e. amount = 0
        - Of the remaining transactions,
            - Identify the reference document numbers
            - Prepare a dataset of these document numbers. All the entries will be SB
            - Exclude the original project/WBS code
    - Of the remaining transaction, get their corresponding ref. document numbers
        - For each document numbers,
            - Filter all corresponding SB transactions (filtered data with SB entries)
            - Exclude the original project/wbs code from the filtered dataset
            - If the value is less than 0, it's transferred to
            - If the value is greater than 0, it's transferred from

In [43]:
final_data = final_data_bak.copy()

In [44]:
import pandas as pd
from itertools import combinations

def find_nullifying_groups(df):
    """
    Identify groups of values in the 'Value in Obj. Crcy' column that nullify each other,
    and return their corresponding 'Ref. document number'.

    Args:
    df (pd.DataFrame): The input DataFrame with columns 'Ref. document number' and 'Value in Obj. Crcy'.

    Returns:
    pd.DataFrame: A DataFrame containing the 'Ref. document number' and 'Value in Obj. Crcy'
                  of nullifying groups.
    """
    # Initialize a set to store indices of rows that nullify each other
    nullifying_indices = set()

    # Extract 'Value in Obj. Crcy' and their indices
    values = df['Value in Obj. Crcy'].tolist()
    index_map = df.index.tolist()  # Map positional indices to actual DataFrame indices

    n = len(values)

    # Check all combinations of rows to find nullifying groups
    for r in range(2, n + 1):  # At least 2 elements in the combination
        for combo in combinations(range(n), r):
            subset = [values[i] for i in combo]
            if abs(sum(subset)) < 1e-9:  # Check for near-zero sum
                # Map positional indices to DataFrame indices
                nullifying_indices.update(index_map[i] for i in combo)

    # Convert the set of indices to a list and filter the DataFrame
    result = df.loc[list(nullifying_indices)]
    return result

In [45]:
project_breakup = pd.DataFrame()

In [54]:
# Step 1
for proj in final_data['Object'].unique():
    print(proj)
    data = final_data[final_data['Object'] == proj]

    # Step 2
    nullifying_groups = find_nullifying_groups(data)

    # Step 3
    data_non_null = data[~data['Ref. document number'].isin(nullifying_groups['Ref. document number'])]

    # Step 4.1 and 4.2
    data_sb = final_data[final_data['Ref. document number'].isin(data_non_null['Ref. document number'])]

    # Step 4.3
    data_proj = data_sb[data_sb['Object'] != proj]

    # Concatenate the final data
    project_breakup = pd.concat([project_breakup, data_proj], ignore_index=True)

    break

DO/2009-HATSREV


In [55]:
data

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
290,1002070478.0,2406001521.0,2024-05-06,2024-05-01,DR,DO/2009-HATS,DO/2009-HATSREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-501.0,-501.0,-501.0,,DO/2009-HATS,HOSTEL ALUMNI TEAM STEWARDSHIP,DO/2009-HATSREV,Hostels - Receipt,hostels_-_receipt,Infrastructure


In [51]:
nullifying_groups

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category


In [56]:
data_non_null

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
290,1002070478.0,2406001521.0,2024-05-06,2024-05-01,DR,DO/2009-HATS,DO/2009-HATSREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-501.0,-501.0,-501.0,,DO/2009-HATS,HOSTEL ALUMNI TEAM STEWARDSHIP,DO/2009-HATSREV,Hostels - Receipt,hostels_-_receipt,Infrastructure


In [40]:
data_proj

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category


In [57]:
data_sb

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
290,1002070478.0,2406001521.0,2024-05-06,2024-05-01,DR,DO/2009-HATS,DO/2009-HATSREV,2024.0,550508.0,Rev-Domestic (Indian fund),...,-501.0,-501.0,-501.0,,DO/2009-HATS,HOSTEL ALUMNI TEAM STEWARDSHIP,DO/2009-HATSREV,Hostels - Receipt,hostels_-_receipt,Infrastructure


In [39]:
project_breakup

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category


In [41]:
final_data.head()

Unnamed: 0,Document Number,Ref. document number,Posting Date,Document Date,Document type,Project definition,Object,Fiscal Year,Cost Element,Cost element descr.,...,Vbl. value/TranCurr.,Vbl. value/Obj. curr,Var.val.in rep.cur.,Purchase order text,WBS,WBS Details,Sub WBS,SUB WBS Details,Project Id,Category
1047,1002161674.0,2426004989.0,2024-08-23,2024-08-23,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,510508.0,EXP-SCHOLARSHIP,...,38348.88,38348.88,38348.88,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
1048,1002161674.0,2426004989.0,2024-08-23,2024-08-23,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,510508.0,EXP-SCHOLARSHIP,...,36109.74,36109.74,36109.74,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
1051,1002164961.0,2426005166.0,2024-08-28,2024-08-27,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,510508.0,EXP-SCHOLARSHIP,...,38348.88,38348.88,38348.88,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
1052,1002164961.0,2426005166.0,2024-08-28,2024-08-27,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,510508.0,EXP-SCHOLARSHIP,...,36109.74,36109.74,36109.74,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
1408,1002199703.0,2426007241.0,2024-10-14,2024-10-01,SB,DO/2000-POOL,DO/2000-POOL002,2024.0,550511.0,Rev-IITBHF Donation,...,-32196387.45,-32196387.45,-32196387.45,,DO/2000-POOL,Donation Pool Account,DO/2000-POOL002,IIT BHF SCHOLARSHIP,iit_bhf_scholarship,
