# Setup

In [1]:
import pandas as pd

TRANSACTION_INDEX = ['RC#', 'Category', 'Vendor#']

In [2]:
original = pd.read_csv("./data/original_sorted.csv")

In [3]:
original.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5568 entries, 0 to 5567
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   RC#          5568 non-null   int64 
 1   RC Name      5568 non-null   object
 2   Category     5568 non-null   int64 
 3   Vendor#      5568 non-null   int64 
 4   Vendor Name  5568 non-null   object
 5   Address 1    5544 non-null   object
 6   Address 2    1290 non-null   object
 7   City         5469 non-null   object
 8   State        5371 non-null   object
 9   ZIP code     5557 non-null   object
 10  Amount       5568 non-null   int64 
dtypes: int64(4), object(7)
memory usage: 478.6+ KB


In [4]:
updated = pd.read_csv("./data/updated_sorted.csv").set_index(TRANSACTION_INDEX)

In [5]:
updated.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 5920 entries, (1, 6000, 100877) to (94, 6900, 328199)
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   RC Name      5920 non-null   object
 1   Vendor Name  5920 non-null   object
 2   Address 1    5896 non-null   object
 3   Address 2    1359 non-null   object
 4   City         5821 non-null   object
 5   State        5716 non-null   object
 6   ZIP code     5909 non-null   object
 7   Amount       5920 non-null   int64 
dtypes: int64(1), object(7)
memory usage: 553.6+ KB


# Top-line numbers

In [6]:
original_sum = original.Amount.sum()
updated_sum = updated.Amount.sum()

updated_sum - original_sum

30599050

In [7]:
new_transactions = len(updated) - len(original)
new_transactions

352

In [8]:
diff = pd.concat([original, updated]).drop_duplicates(keep=False)

In [9]:
total_transactions_changed = len(diff)
total_transactions_changed

11484

In [10]:
original_transactions_changed = total_transactions_changed - new_transactions
original_transactions_changed

11132

# Merge transactions

In [11]:
merged_transactions = original.merge(updated, on=TRANSACTION_INDEX, suffixes=('_o', '_u'))
merged_transactions['AmountDiff'] = merged_transactions.Amount_u - merged_transactions.Amount_o

# Largest amount change

In [12]:
merged_transactions.sort_values(by=['AmountDiff'], ascending=False).head(5)

Unnamed: 0,RC#,RC Name_o,Category,Vendor#,Vendor Name_o,Address 1_o,Address 2_o,City_o,State_o,ZIP code_o,Amount_o,RC Name_u,Vendor Name_u,Address 1_u,Address 2_u,City_u,State_u,ZIP code_u,Amount_u,AmountDiff
4767,85,SOMD Administration,7300,424204,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,8366840,SOMD Administration,BPA II LTD,1468 WEST 9TH STREET SUITE 835,,CLEVELAND,OH,44113,10951439,2584599
5292,92,Business and Auxiliary Services,7300,505445,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,9634730,Business and Auxiliary Services,WYNDHAM PITTSBURGH UNIVERSITY CENTER,545 E JOHN CARPENTER FREEWAY STE 1300,,IRVING,TX,75062,11657790,2023060
4413,83,General University,7700,204776,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,4335280,General University,"MARSH USA, INC",P O BOX 281915,,ATLANTA,GA,30384,6071082,1735802
5298,92,Business and Auxiliary Services,7300,687995,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,5168342,Business and Auxiliary Services,MWK FORBES II LLC,3341 FORBES AVENUE,,PITTSBURGH,PA,15213,6280388,1112046
3798,67,Facilities Management,7000,103658,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,23831022,Facilities Management,DUQUESNE LIGHT COMPANY,PO BOX 830012,,BALTIMORE,MD,21283-0012,24578216,747194


In [14]:
mg2 = original.merge(updated.drop_duplicates(), on=TRANSACTION_INDEX, how='left', indicator=True)
mg2[mg2._merge == "left_only"]

Unnamed: 0,RC#,RC Name_x,Category,Vendor#,Vendor Name_x,Address 1_x,Address 2_x,City_x,State_x,ZIP code_x,Amount_x,RC Name_y,Vendor Name_y,Address 1_y,Address 2_y,City_y,State_y,ZIP code_y,Amount_y,_merge
24,1,Chancellor,6400,102512,CITY OF PITTSBURGH,P.O. BOX 140309,ALARM PROGRAM,IRVING,TX,75014,2411,,,,,,,,,left_only
25,1,Chancellor,6400,104738,"GOFF, C WILLIAM",307 LINCOLN AVE,,PITTSBURGH,PA,15237,2000,,,,,,,,,left_only
26,1,Chancellor,6400,109231,PITTSBURGH PIRATES,TICKET OFFICE,115 FEDERAL ST,PITTSBURGH,PA,15212,93673,,,,,,,,,left_only
28,1,Chancellor,6400,154253,SIGNS BY TOMORROW,422 E PITTSBURGH ST,,GREENSBURG,PA,15601,1995,,,,,,,,,left_only
30,1,Chancellor,6400,175975,ALCO PARKING CORP,CENTRE COMMONS GARAGE,501 MARTINDALE STREET,PITTSBURGH,PA,15212,1188,,,,,,,,,left_only
32,1,Chancellor,6400,203484,LAMAR COMPANIES,PO BOX 96030,,BATON ROUGE,LA,70896,1050,,,,,,,,,left_only
53,1,Chancellor,6400,418875,TALENT GROUP INC,2820 SMALLMAN STREET,,PITTSBURGH,PA,15222,11340,,,,,,,,,left_only
57,1,Chancellor,6400,481571,"WILL, NICHOLAS J",801 CLEARVIEW AVENUE,,PITTSBURGH,PA,15205,2250,,,,,,,,,left_only
58,1,Chancellor,6400,483309,"MOORE, EDWARD ALAN",265 MAIN STREET #1,,PITTSBURGH,PA,15201,3500,,,,,,,,,left_only
70,1,Chancellor,6400,571859,BRAND IQ CREATIVE GROUP,600 GRANT ST SUITE 660,,PITTSBURGH,PA,15219,21000,,,,,,,,,left_only
