# Setup

In [1]:
import pandas as pd

TRANSACTION_INDEX = 'TPN_ID'

# Open data files

In [2]:
original = pd.read_csv("./data/original_sorted.csv")

In [3]:
original.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5568 entries, 0 to 5567
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   RC#          5568 non-null   int64 
 1   RC Name      5568 non-null   object
 2   Category     5568 non-null   int64 
 3   Vendor#      5568 non-null   int64 
 4   Vendor Name  5568 non-null   object
 5   Address 1    5544 non-null   object
 6   Address 2    1290 non-null   object
 7   City         5469 non-null   object
 8   State        5371 non-null   object
 9   ZIP code     5557 non-null   object
 10  Amount       5568 non-null   int64 
 11  TPN_ID       5568 non-null   object
dtypes: int64(4), object(8)
memory usage: 522.1+ KB


In [4]:
updated = pd.read_csv("./data/updated_sorted.csv")

In [5]:
updated.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5920 entries, 0 to 5919
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   RC#          5920 non-null   int64 
 1   RC Name      5920 non-null   object
 2   Category     5920 non-null   int64 
 3   Vendor#      5920 non-null   int64 
 4   Vendor Name  5920 non-null   object
 5   Address 1    5896 non-null   object
 6   Address 2    1359 non-null   object
 7   City         5821 non-null   object
 8   State        5716 non-null   object
 9   ZIP code     5909 non-null   object
 10  Amount       5920 non-null   int64 
 11  TPN_ID       5920 non-null   object
dtypes: int64(4), object(8)
memory usage: 555.1+ KB


# Top-line numbers

In [6]:
original_sum = original.Amount.sum()
updated_sum = updated.Amount.sum()
updated_sum - original_sum

30599050

In [7]:
(updated_sum - original_sum)/original_sum

0.08429766752297206

In [8]:
new_transactions = len(updated.index.unique()) - len(original.index.unique())
new_transactions

352

# Merge transactions

In [9]:
merged_transactions = original.merge(updated, on=TRANSACTION_INDEX, suffixes=('_o', '_u'))
merged_transactions['AmountDiff'] = merged_transactions.Amount_u - merged_transactions.Amount_o

# Largest amounts - change from original to updated

In [10]:
merged_transactions.sort_values(by=['AmountDiff'], ascending=False).head(5)

Unnamed: 0,RC#_o,RC Name_o,Category_o,Vendor#_o,Vendor Name_o,Address 1_o,Address 2_o,City_o,State_o,ZIP code_o,...,Category_u,Vendor#_u,Vendor Name_u,Address 1_u,Address 2_u,City_u,State_u,ZIP code_u,Amount_u,AmountDiff
4768,85,SOMD Administration,7300,424204,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,...,7300,424204,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,10951439,2584599
5304,92,Business and Auxiliary Services,7300,505445,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,...,7300,505445,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,11657790,2023060
4414,83,General University,7700,204776,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,...,7700,204776,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,6071082,1735802
5310,92,Business and Auxiliary Services,7300,687995,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,...,7300,687995,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,6280388,1112046
3799,67,Facilities Management,7000,103658,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,...,7000,103658,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,24578216,747194


# Amounts - change from original to updated

In [11]:
merged_transactions[merged_transactions['AmountDiff'] < 0].sort_values(by=['AmountDiff'], ascending=True)

Unnamed: 0,RC#_o,RC Name_o,Category_o,Vendor#_o,Vendor Name_o,Address 1_o,Address 2_o,City_o,State_o,ZIP code_o,...,Category_u,Vendor#_u,Vendor Name_u,Address 1_u,Address 2_u,City_u,State_u,ZIP code_u,Amount_u,AmountDiff
39,1,Chancellor,6400,320609,MEDIAQUEST,4891 CAMPBELLS RUN RD,,PITTSBURGH,PA,15205,...,6400,320609,MEDIAQUEST,4891 CAMPBELLS RUN RD,,PITTSBURGH,PA,15205,23212,-27455
167,3,SVC Research,6400,424663,CPA GLOBAL LIMITED,225 REINEKERS LANE,STE. 400,ALEXANDRIA,VA,22314,...,6400,424663,CPA GLOBAL LIMITED,225 REINEKERS LANE,STE. 400,ALEXANDRIA,VA,22314,927936,-9364
1938,35,School of Medicine,7100,100897,AQUA FILTER FRESH INC,PO BOX 14128,,PITTSBURGH,PA,15239-0128,...,7100,100897,AQUA FILTER FRESH INC,PO BOX 14128,,PITTSBURGH,PA,15239-0128,4213,-5836
3558,61,Pitt Information Technology,7400,482143,COMCAST BUSINESS,ONE COMCAST CENTER 32ND FL,,PHILADELPHIA,PA,19103-2838,...,7400,482143,COMCAST BUSINESS,ONE COMCAST CENTER 32ND FL,,PHILADELPHIA,PA,19103-2838,40233,-3773
5541,94,School of Computing and Information,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,...,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,13142,-2142
921,22,School of Education,6400,500751,INRESONANCE,32 INDUSTRIAL DRIVE EAST,SUITE 1,NORTHAMPTON,MA,01060,...,6400,500751,INRESONANCE,PO BOX 122145,DEPT 2145,DALLAS,TX,75312-2145,64907,-1840
418,6,Kenneth P. Dietrich School of Arts and Sciences,6000,503874,SAI LIFE SCIENCES LIMITED,GROUND 1ST 2ND & 3RD FL BLDG I PLOT NO 2,CHRYSALIS ENCLAVE INTL BIOTECH PARK,HINJEWADI,PUNE,411 057,...,6000,503874,SAI LIFE SCIENCES LIMITED,GROUND 1ST 2ND & 3RD FL BLDG I PLOT NO 2,CHRYSALIS ENCLAVE INTL BIOTECH PARK,HINJEWADI,PUNE,411 057,19355,-1155
428,6,Kenneth P. Dietrich School of Arts and Sciences,6000,555151,LABWEAR.COM,480 BARNUM AVE BUILDING 5 SUITE 30,,BRIDGEPORT,CT,06608,...,6000,555151,LABWEAR.COM,480 BARNUM AVE BUILDING 5 SUITE 30,,BRIDGEPORT,CT,06608,1685,-727
4178,80,Athletics,6000,675887,KINETIC PERFORMANCE TECHNOLOGY PTY LTD,UNIT 8 26-28 WINCHCOMBE CT,,MITCHELL ACT,,2911,...,6000,675887,KINETIC PERFORMANCE TECHNOLOGY PTY LTD,UNIT 8 26-28 WINCHCOMBE CT,,MITCHELL ACT,,2911,17163,-555
11,1,Chancellor,6000,521322,SUPRA OFFICE SOLUTIONS INC,PO BOX 201,,BALA CYNWYD,PA,19004,...,6000,521322,SUPRA OFFICE SOLUTIONS INC,PO BOX 201,,BALA CYNWYD,PA,19004,6302,-298


In [12]:
merged_transactions[merged_transactions['AmountDiff'] == 0]

Unnamed: 0,RC#_o,RC Name_o,Category_o,Vendor#_o,Vendor Name_o,Address 1_o,Address 2_o,City_o,State_o,ZIP code_o,...,Category_u,Vendor#_u,Vendor Name_u,Address 1_u,Address 2_u,City_u,State_u,ZIP code_u,Amount_u,AmountDiff
0,1,Chancellor,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,...,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,18071,0
1,1,Chancellor,6000,101243,B&H PHOTO VIDEO,119 WEST 17TH STREET,,NEW YORK,NY,10011,...,6000,101243,B&H PHOTO VIDEO,119 WEST 17TH STREET,,NEW YORK,NY,10011,4019,0
3,1,Chancellor,6000,104384,FRANKLIN INTERIORS,PO BOX 6107,,HERMITAGE,PA,16148-0923,...,6000,104384,FRANKLIN INTERIORS,PO BOX 6107,,HERMITAGE,PA,16148-0923,3284,0
4,1,Chancellor,6000,119301,WORKSCAPE INC,KNOLL C/O WORKSCAPE,PO BOX 277778,ATLANTA,GA,30384-7778,...,6000,119301,WORKSCAPE INC,KNOLL C/O WORKSCAPE,PO BOX 277778,ATLANTA,GA,30384-7778,2051,0
6,1,Chancellor,6000,223247,GEORGIA TECH RESEARCH CORP,PO BOX 100117,,ATLANTA,GA,30384,...,6000,223247,GEORGIA TECH RESEARCH CORP,PO BOX 100117,,ATLANTA,GA,30384,5120,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5557,94,School of Computing and Information,6400,509937,GO-U LLC,2310 RIDGE AVENUE,,EVANSTON,IL,60201,...,6400,509937,GO-U LLC,2310 RIDGE AVENUE,,EVANSTON,IL,60201,8000,0
5558,94,School of Computing and Information,6400,562181,ADAM REGER LLC,409 EMERSON AVE,,PITTSBURGH,PA,15215,...,6400,562181,ADAM REGER LLC,409 EMERSON AVE,,PITTSBURGH,PA,15215,2000,0
5559,94,School of Computing and Information,6400,588965,EMMA INC,75 REMITTANCE DRIVE SUITE 6222,,CHICAGO,IL,60675-6222,...,6400,588965,EMMA INC,75 REMITTANCE DRIVE SUITE 6222,,CHICAGO,IL,60675-6222,2868,0
5560,94,School of Computing and Information,6400,687570,"THOMAS, JAMES 5",2910 N VAN NESS BLVD,,FRESNO,CA,93704,...,6400,687570,"THOMAS, JAMES 5",2910 N VAN NESS BLVD,,FRESNO,CA,93704,1999,0


In [13]:
merged_transactions[merged_transactions['AmountDiff'] > 0].sort_values(by=['AmountDiff'], ascending=False)

Unnamed: 0,RC#_o,RC Name_o,Category_o,Vendor#_o,Vendor Name_o,Address 1_o,Address 2_o,City_o,State_o,ZIP code_o,...,Category_u,Vendor#_u,Vendor Name_u,Address 1_u,Address 2_u,City_u,State_u,ZIP code_u,Amount_u,AmountDiff
4768,85,SOMD Administration,7300,424204,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,...,7300,424204,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,10951439,2584599
5304,92,Business and Auxiliary Services,7300,505445,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,...,7300,505445,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,11657790,2023060
4414,83,General University,7700,204776,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,...,7700,204776,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,6071082,1735802
5310,92,Business and Auxiliary Services,7300,687995,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,...,7300,687995,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,6280388,1112046
3799,67,Facilities Management,7000,103658,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,...,7000,103658,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,24578216,747194
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177,41,Johnstown Campus,6000,248635,GRAINGER,3750 BROOKSIDE PKWY,STE 260,ALPHARETTA,GA,30022-1433,...,6000,248635,GRAINGER,3750 BROOKSIDE PKWY,STE 260,ALPHARETTA,GA,30022-1433,125297,53
4621,85,SOMD Administration,6600,534443,"SPOK, INC",5911 KINGSTOWNE VILLAGE PARKWAY,6TH FLOOR,ALEXANDRIA,VA,22315,...,6600,534443,"SPOK, INC",5911 KINGSTOWNE VILLAGE PARKWAY,6TH FLOOR,ALEXANDRIA,VA,22315,8301,53
5,1,Chancellor,6000,185133,CDW GOVERNMENT INC,75 REMITTANCE DR,SUITE 1515,CHICAGO,IL,60675-1515,...,6000,185133,CDW GOVERNMENT INC,75 REMITTANCE DR,SUITE 1515,CHICAGO,IL,60675-1515,7547,47
214,5,Student Affairs,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,...,6000,100877,APPLE COMPUTER INC,PO BOX 281877,,ATLANTA,GA,30384-1877,18835,38


# Largest amounts - only appearing in updated

In [14]:
new_transactions = original.merge(updated.drop_duplicates(), on=TRANSACTION_INDEX, how='right', indicator=True)
new_transactions.fillna(value={"Amount_x": 0}, inplace=True)
new_transactions['AmountDiff'] = new_transactions.Amount_y - new_transactions.Amount_x

In [15]:
display_columns = ['RC#_y', 'RC Name_y', 'Category_y', 'Vendor#_y', 'Vendor Name_y', 'Address 1_y', 'Address 2_y', 'City_y', 'State_y', 'ZIP code_y', 'Amount_y', 'TPN_ID', '_merge', 'AmountDiff']
new_transactions[new_transactions._merge == "right_only"].sort_values(by=['AmountDiff'], ascending=False).head(5)[display_columns]

Unnamed: 0,RC#_y,RC Name_y,Category_y,Vendor#_y,Vendor Name_y,Address 1_y,Address 2_y,City_y,State_y,ZIP code_y,Amount_y,TPN_ID,_merge,AmountDiff
469,6,Kenneth P. Dietrich School of Arts and Sciences,6400,104257,FISHER SCIENTIFIC COMPANY LLC,PO BOX 406538,,ATLANTA,GA,30384-6538,2344352,6-6400-104257,right_only,2344352.0
5861,93,Public Safety and Emergency Management,6400,446194,"LANDMARK EVENT STAFFING SERVICES, INC",4131 HARBOR WALK DRIVE,,FORT COLLINS,CO,80525,1689115,93-6400-446194,right_only,1689115.0
1315,30,SVC Health Sciences,6400,114200,QUEST DIAGNOSTICS,P.O. BOX 740709,,ATLANTA,GA,30374-0709,1237840,30-6400-114200,right_only,1237840.0
787,10,SVC and Provost,6400,689608,PFL,100 PFL WAY,,LIVINGSTON,MT,59047,790510,10-6400-689608,right_only,790510.0
5063,87,SVC and Chief Financial Officer,6400,521322,SUPRA OFFICE SOLUTIONS INC,PO BOX 201,,BALA CYNWYD,PA,19004,600681,87-6400-521322,right_only,600681.0
