# Make distances

In [1]:
import pandas as pd
import os
import numpy as np
import datetime
original_donors = pd.read_csv('raw_data/donors.csv',parse_dates=['DateCreated'],dtype={'address':str})
original_recipients = pd.read_csv('raw_data/recipients.csv',parse_dates=['createddate'],dtype={'address':str})
original_donors.sort_values('DateCreated',inplace=True)
original_recipients.sort_values('createddate',inplace=True)

## Get rid of test rows

In [2]:
dd = original_donors.groupby('DonorKey').size().sort_values().to_frame().reset_index()
dd.columns=['DonorKey','']

In [3]:
original_donors.head()

Unnamed: 0,DonorName,DonorKey,PostCode,EquipmentType,UnopenQty,OpenQty,DateCreated
1100,Bahadar Srichawla,77998232-6b31-4939-8c22-0901cac6f913,10940,Filtering facepiece respirators,0.0,20.0,2020-03-27 11:47:00+00:00
469,Pierre Baillargeon,e6dd5103-1e80-499f-8b41-4258497cfb34,33458,3D printed / laser cut face shields,10.0,10.0,2020-04-09 13:08:00+00:00
470,Barbara Pettijohn,1dd1ced6-2c35-4a64-8b7c-3dcaf4814f13,6084,3D printed / laser cut face shields,1.0,1.0,2020-04-09 13:36:00+00:00
327,John Wagner,b598e546-dd8e-4978-b31a-92fc53b70cdf,55110,3D printed / laser cut face shields,3000.0,3000.0,2020-04-09 13:53:00+00:00
328,Michael Galloway,5f7fe93a-8943-4cde-bbb8-41ff8ba9bc56,19128,3D printed / laser cut face shields,5.0,5.0,2020-04-09 14:24:00+00:00


In [4]:
original_donors[original_donors.DonorKey == '89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4']

Unnamed: 0,DonorName,DonorKey,PostCode,EquipmentType,UnopenQty,OpenQty,DateCreated
1644,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Safety goggles,50000.0,0.0,2020-07-14 10:22:40.322000+00:00
1650,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Surgical/procedure masks,20000.0,0.0,2020-07-14 10:22:40.322000+00:00
1645,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Nitrile gloves,50000.0,0.0,2020-07-14 10:22:40.322000+00:00
42,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Disinfecting wipes,15000.0,0.0,2020-07-14 10:22:40.322000+00:00
1649,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Filtering facepiece respirators,10000.0,0.0,2020-07-14 10:22:40.322000+00:00
1646,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Hand sanitizer,10000.0,0.0,2020-07-14 10:22:40.322000+00:00
1647,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Face shields,10000.0,0.0,2020-07-14 10:22:40.322000+00:00
1648,Ivailo Pavlov,89a046dd-a8cf-4223-9bf0-2c8ab3dfedd4,2705,Safety glasses,5000.0,0.0,2020-07-14 10:22:40.322000+00:00


In [5]:
type_dict = {
    'Filtering facepiece respirators':'respirators',
    '3D printed / laser cut face shields':'faceShields',
    'Face shields':'faceShields',
    'Surgical cap or bouffant cap':'surgicalCaps',
    'Surgical/procedure masks':'surgicalMasks',
    'Handmade masks':'handmadeMasks',
    'Nitrile gloves':'nitrileGloves',
    'Gowns':'gowns',
    'Coveralls / bunny suits':'coveralls',
    'Safety glasses':'safetyGlasses',
    'Disposable booties':'disposableBooties',
    'Hand sanitizer':'handSanitizer',
    'Disinfecting wipes':'disinfectingWipes',
    'Baby monitors / two way radios':'babyMonitors',
    'Safety goggles':'safetyGoggles',
    'Thermometers':'thermometers',
    'Body Bag':'bodyBags',
    'PAPR Shield':'paprShield',
}

# Recipients

### Keep only the columns with date, facilityName, and the request for each PPE

In [6]:
original_recipients['facilityKey'].nunique()

13816

In [7]:
cols_to_keeps = ['facilityKey','postCode','createddate']
cols_to_keeps.extend(set(type_dict.values()))
mod_recipients = original_recipients.copy()[cols_to_keeps]

### Remove lines where date is missing

In [8]:
print(f'From {len(mod_recipients)} lines')

From 13819 lines


In [9]:
mod_recipients = mod_recipients[~mod_recipients["createddate"].isna()]

In [10]:
print(f'To {len(mod_recipients)} lines')

To 13213 lines


### Keep only zipcodes with only digits. And padleft with zeros

In [11]:
print(f'From {len(mod_recipients)} lines')

From 13213 lines


In [12]:
mod_recipients['postCode'] = mod_recipients['postCode'].apply(lambda s : str(s).zfill(5))

In [13]:
len(mod_recipients)

13213

In [14]:
mod_recipients = mod_recipients[mod_recipients['postCode'].str.isdigit()]

In [15]:
len(mod_recipients)

12751

In [16]:
mod_recipients = mod_recipients[~mod_recipients['postCode'].isna()]

In [17]:
len(mod_recipients)

12751

In [18]:
print(f'To {len(mod_recipients)} lines')

To 12751 lines


### Deal with nulls in requests and set negative requests to zero

In [19]:
for c in set(type_dict.values()):
    mod_recipients.loc[:,c] = mod_recipients[c].fillna(0)
    mod_recipients.loc[mod_recipients[c]<0,c] = 0

### Remove recipients with zero requests

In [20]:
aaa = mod_recipients[set(type_dict.values())].sum(axis=1)
mod_recipients = mod_recipients[aaa > 0]

### Rename columns

In [21]:
mod_recipients.rename(columns={'facilityKey':'rec_id','postCode':'zipcode','createddate':'date'},inplace=True)

In [22]:
mod_recipients.head()

Unnamed: 0,rec_id,zipcode,date,disinfectingWipes,surgicalCaps,disposableBooties,respirators,handmadeMasks,nitrileGloves,coveralls,handSanitizer,safetyGlasses,bodyBags,gowns,faceShields,safetyGoggles,thermometers,surgicalMasks,paprShield,babyMonitors
699,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,0,0,0,10000.0,0,10000,0,0,0,0,1000,5000,0,0,0,0,0
1957,0c33f512-d45f-4462-9703-f06d1add777a,91505,2020-04-02 16:35:00+00:00,4,0,0,9.0,9,5,0,4,0,0,9,9,0,0,0,0,0
690,6da9e3db-ffcd-49c5-a37e-c606eeb3e6d3,98208,2020-04-02 16:44:00+00:00,300,0,100,5.0,0,0,0,25,0,0,100,10,0,20,0,0,0
780,2206ab02-6714-4ea5-ac9a-134efc936974,45209,2020-04-02 16:47:00+00:00,50,0,0,200.0,100,100,0,100,0,0,400,200,0,50,0,0,0
227,5b9b0536-c071-4903-b513-a61c6d3fd3d4,73703,2020-04-02 16:54:00+00:00,0,0,0,10.0,0,0,0,4,0,0,30,0,0,2,0,0,0


# Donors

In [23]:
mod_donors = original_donors[['DonorKey','DateCreated','PostCode','EquipmentType','UnopenQty']]
mod_donors = mod_donors[mod_donors['UnopenQty']>0]
mod_donors

Unnamed: 0,DonorKey,DateCreated,PostCode,EquipmentType,UnopenQty
469,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,3D printed / laser cut face shields,10.0
470,1dd1ced6-2c35-4a64-8b7c-3dcaf4814f13,2020-04-09 13:36:00+00:00,6084,3D printed / laser cut face shields,1.0
327,b598e546-dd8e-4978-b31a-92fc53b70cdf,2020-04-09 13:53:00+00:00,55110,3D printed / laser cut face shields,3000.0
328,5f7fe93a-8943-4cde-bbb8-41ff8ba9bc56,2020-04-09 14:24:00+00:00,19128,3D printed / laser cut face shields,5.0
329,7bbc52ad-1462-4419-9403-666a89fdbec6,2020-04-09 17:52:00+00:00,95379,3D printed / laser cut face shields,10.0
...,...,...,...,...,...
1708,37573f01-1a35-4483-9d87-62c99cdeb148,2020-07-18 12:11:44.881000+00:00,57701,Handmade masks,50.0
1667,f154b325-3546-48b9-98d8-718d62cfc548,2020-07-19 11:40:30.788000+00:00,8521NK,Filtering facepiece respirators,200000.0
1703,65b6ee3f-ff9d-4a6f-84ed-fdd42fd21e2f,2020-07-19 20:15:07.763000+00:00,L6E1R7,Handmade masks,20.0
1690,8e68449c-da32-4d55-8153-fa64677914f0,2020-07-19 23:13:57.061000+00:00,H9x1500,Filtering facepiece respirators,25000.0


### Keep only zipcodes with only digits. And padleft with zeros

In [24]:
print(f'From {len(mod_donors)} lines')

From 1034 lines


In [25]:
mod_donors['PostCode'] = mod_donors['PostCode'].apply(lambda s : str(s).zfill(5))

In [26]:
mod_donors = mod_donors[mod_donors['PostCode'].str.isdigit()]

In [27]:
mod_donors = mod_donors[~mod_donors['PostCode'].isna()]

In [28]:
print(f'To {len(mod_donors)} lines')

To 1024 lines


### Set Qty as the max between UnopenQty and OpenQty (check with Rohit)

In [29]:
mod_donors['Qty']= mod_donors['UnopenQty']

In [30]:
mod_donors.drop(columns=['UnopenQty'],inplace=True)

### Use dictionary type_dict to change names of PPE so that they match the donor DataFrame

In [31]:
mod_donors.loc[:,'EquipmentType'] = mod_donors['EquipmentType'].replace(type_dict)

### Rename columns

In [32]:
mod_donors.rename(columns={'DonorKey':'don_id','DateCreated':'date','PostCode':'zipcode','EquipmentType':'ppe','Qty':'qty'},inplace=True)

In [33]:
mod_donors.head()

Unnamed: 0,don_id,date,zipcode,ppe,qty
469,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0
470,1dd1ced6-2c35-4a64-8b7c-3dcaf4814f13,2020-04-09 13:36:00+00:00,6084,faceShields,1.0
327,b598e546-dd8e-4978-b31a-92fc53b70cdf,2020-04-09 13:53:00+00:00,55110,faceShields,3000.0
328,5f7fe93a-8943-4cde-bbb8-41ff8ba9bc56,2020-04-09 14:24:00+00:00,19128,faceShields,5.0
329,7bbc52ad-1462-4419-9403-666a89fdbec6,2020-04-09 17:52:00+00:00,95379,faceShields,10.0


# Distance Matrix

In [34]:
import pgeocode
dist_calc = pgeocode.GeoDistance('US')

In [35]:
rrr = mod_recipients.set_index(['rec_id','zipcode','date']).stack().to_frame().reset_index()
rrr.columns=['rec_id','zipcode','date','ppe','qty']
rrr = rrr[rrr.qty>0]
rrr

Unnamed: 0,rec_id,zipcode,date,ppe,qty
3,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,respirators,10000.0
5,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,nitrileGloves,10000.0
10,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,gowns,1000.0
11,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,faceShields,5000.0
17,0c33f512-d45f-4462-9703-f06d1add777a,91505,2020-04-02 16:35:00+00:00,disinfectingWipes,4.0
...,...,...,...,...,...
109265,8e08b25f-a7dc-491c-81af-a848cc481ef0,91204,2020-07-17 08:14:58.249000+00:00,coveralls,10.0
109273,8e08b25f-a7dc-491c-81af-a848cc481ef0,91204,2020-07-17 08:14:58.249000+00:00,surgicalMasks,300.0
109274,8e08b25f-a7dc-491c-81af-a848cc481ef0,91204,2020-07-17 08:14:58.249000+00:00,paprShield,50.0
109278,a5128eb4-d067-44b5-90c7-8a1313ea2bda,77030,2020-07-17 14:25:54.605000+00:00,disposableBooties,1.0


In [36]:
m1 = mod_donors.merge(rrr,on='ppe',suffixes=('_don','_rec'))
m1

Unnamed: 0,don_id,date_don,zipcode_don,ppe,qty_don,rec_id,zipcode_rec,date_rec,qty_rec
0,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,5000.0
1,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,0c33f512-d45f-4462-9703-f06d1add777a,91505,2020-04-02 16:35:00+00:00,9.0
2,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,6da9e3db-ffcd-49c5-a37e-c606eeb3e6d3,98208,2020-04-02 16:44:00+00:00,10.0
3,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,2206ab02-6714-4ea5-ac9a-134efc936974,45209,2020-04-02 16:47:00+00:00,200.0
4,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,e62244d6-df43-44b3-8622-b7dc6ada4a08,92683,2020-04-02 16:55:00+00:00,100.0
...,...,...,...,...,...,...,...,...,...
3200992,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,1ef916f1-2fe0-4de9-a0b2-6981a0eb34c8,41501,2020-07-12 21:57:10.032000+00:00,7.0
3200993,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,d54a1f90-3e84-4d1a-ac55-d296dcbf04dc,77045,2020-07-13 04:39:04.804000+00:00,100.0
3200994,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,be82b38f-8727-4277-9641-f300856b3a64,00265,2020-07-14 08:44:26.061000+00:00,1000.0
3200995,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,fe211921-0125-4c08-89c6-f1c386f5e60a,77082,2020-07-15 15:44:03.267000+00:00,20.0


In [37]:
distances = dist_calc.query_postal_code(
    m1['zipcode_don'].values,
    m1['zipcode_rec'].values) * 0.621371

In [38]:
m1.loc[:,'distance'] = distances
m1

Unnamed: 0,don_id,date_don,zipcode_don,ppe,qty_don,rec_id,zipcode_rec,date_rec,qty_rec,distance
0,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,4320d44a-a8a7-4394-a842-8503153faa44,94609,2020-04-02 16:27:00+00:00,5000.0,2548.016134
1,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,0c33f512-d45f-4462-9703-f06d1add777a,91505,2020-04-02 16:35:00+00:00,9.0,2314.629512
2,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,6da9e3db-ffcd-49c5-a37e-c606eeb3e6d3,98208,2020-04-02 16:44:00+00:00,10.0,2678.304814
3,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,2206ab02-6714-4ea5-ac9a-134efc936974,45209,2020-04-02 16:47:00+00:00,200.0,879.950511
4,e6dd5103-1e80-499f-8b41-4258497cfb34,2020-04-09 13:08:00+00:00,33458,faceShields,10.0,e62244d6-df43-44b3-8622-b7dc6ada4a08,92683,2020-04-02 16:55:00+00:00,100.0,2293.700408
...,...,...,...,...,...,...,...,...,...,...
3200992,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,1ef916f1-2fe0-4de9-a0b2-6981a0eb34c8,41501,2020-07-12 21:57:10.032000+00:00,7.0,741.574503
3200993,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,d54a1f90-3e84-4d1a-ac55-d296dcbf04dc,77045,2020-07-13 04:39:04.804000+00:00,100.0,199.541385
3200994,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,be82b38f-8727-4277-9641-f300856b3a64,00265,2020-07-14 08:44:26.061000+00:00,1000.0,
3200995,ae3e3190-58c1-41f0-8925-3a4e9d8b30f6,2020-05-27 01:40:58+00:00,70525,bodyBags,10.0,fe211921-0125-4c08-89c6-f1c386f5e60a,77082,2020-07-15 15:44:03.267000+00:00,20.0,208.912756


In [39]:
zipcodes = m1.groupby(['don_id','rec_id','distance']).size().sort_values().to_frame().reset_index()
zipcodes

Unnamed: 0,don_id,rec_id,distance,0
0,0028bc23-d933-49ff-b213-b001b2bd1fff,0016bd18-3c2b-44ce-8b14-ef75dc3c480b,540.263969,1
1,a89f591f-d40a-4343-b366-d20486fa9acf,392656c0-3f09-482f-9e67-175bea1c9aae,770.589552,1
2,a89f591f-d40a-4343-b366-d20486fa9acf,393fa4a7-76be-4413-aa4c-24fdd97f0ffc,1017.043444,1
3,a89f591f-d40a-4343-b366-d20486fa9acf,3943473c-c8f6-4a35-9c2d-ce16b6c1916e,612.656722,1
4,a89f591f-d40a-4343-b366-d20486fa9acf,39452578-7a40-41e8-98f5-7f3bb08c56e6,1737.009898,1
...,...,...,...,...
2782715,713b89e7-893b-4847-98e9-d48d0753e816,9d6adb24-8df5-4a05-946e-053df6a8997b,883.147945,15
2782716,d7444d88-6300-499f-b39f-8dbfee2ce676,e077071e-3a2c-4707-bcc7-0caf085c5c85,2452.256239,16
2782717,d7444d88-6300-499f-b39f-8dbfee2ce676,7099acf1-0f8c-40aa-a344-567717531a24,873.115576,18
2782718,713b89e7-893b-4847-98e9-d48d0753e816,e077071e-3a2c-4707-bcc7-0caf085c5c85,1714.408610,18


In [40]:
zipcodes = zipcodes[['don_id','rec_id','distance']]

In [41]:
zipcodes

Unnamed: 0,don_id,rec_id,distance
0,0028bc23-d933-49ff-b213-b001b2bd1fff,0016bd18-3c2b-44ce-8b14-ef75dc3c480b,540.263969
1,a89f591f-d40a-4343-b366-d20486fa9acf,392656c0-3f09-482f-9e67-175bea1c9aae,770.589552
2,a89f591f-d40a-4343-b366-d20486fa9acf,393fa4a7-76be-4413-aa4c-24fdd97f0ffc,1017.043444
3,a89f591f-d40a-4343-b366-d20486fa9acf,3943473c-c8f6-4a35-9c2d-ce16b6c1916e,612.656722
4,a89f591f-d40a-4343-b366-d20486fa9acf,39452578-7a40-41e8-98f5-7f3bb08c56e6,1737.009898
...,...,...,...
2782715,713b89e7-893b-4847-98e9-d48d0753e816,9d6adb24-8df5-4a05-946e-053df6a8997b,883.147945
2782716,d7444d88-6300-499f-b39f-8dbfee2ce676,e077071e-3a2c-4707-bcc7-0caf085c5c85,2452.256239
2782717,d7444d88-6300-499f-b39f-8dbfee2ce676,7099acf1-0f8c-40aa-a344-567717531a24,873.115576
2782718,713b89e7-893b-4847-98e9-d48d0753e816,e077071e-3a2c-4707-bcc7-0caf085c5c85,1714.408610


# Remove donors and recipients that are not present in the distance matrix

In [42]:
donors_to_keep = set(zipcodes.don_id)
mod_donors = mod_donors[mod_donors.don_id.isin(donors_to_keep)]

In [43]:
recipients_to_keep = set(zipcodes.rec_id)
mod_recipients = mod_recipients[mod_recipients.rec_id.isin(recipients_to_keep)]

# Anonymize

### Ids for recipients and donors

In [44]:
dict_rec_ids = {}
i=0
for r in mod_recipients['rec_id'].values:
    dict_rec_ids[r]='rec'+str(i)
    i+=1

In [45]:
dict_don_ids = {}
i=0
for d in mod_donors['don_id'].values:
    dict_don_ids[d]='don'+str(i)
    i+=1

In [46]:
mod_recipients.loc[:,'rec_id'] = mod_recipients['rec_id'].replace(dict_rec_ids)

In [47]:
mod_donors.loc[:,'don_id'] = mod_donors['don_id'].replace(dict_don_ids)

### Dict for zipcodes

In [48]:
rec_dict = {}
i=0
for z in mod_recipients['zipcode'].unique():
    rec_dict[z]='rec_zip'+str(i)
    i+=1

In [49]:
don_zipcode_dict = {}
i=0
for z in mod_donors['zipcode'].unique():
    don_zipcode_dict[z]='don_zip'+str(i)
    i+=1

In [50]:
mod_recipients.loc[:,'zipcode'] = mod_recipients['zipcode'].replace(rec_dict)

In [51]:
mod_donors.loc[:,'zipcode'] = mod_donors['zipcode'].replace(don_zipcode_dict)

### Anonymize the distance matrix

In [52]:
zipcodes.columns

Index(['don_id', 'rec_id', 'distance'], dtype='object')

In [53]:
zipcodes.loc[:,'rec_id']  = zipcodes['rec_id'].map(dict_rec_ids.get)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [54]:
zipcodes.loc[:,'don_id']  = zipcodes['don_id'].map(dict_don_ids.get)

# Print the anonymized data

In [55]:
mod_recipients.drop(columns=['zipcode'],inplace=True)

In [56]:
mod_recipients['rec_req_id'] = np.arange(0,len(mod_recipients))

In [60]:
mod_recipients = mod_recipients.reset_index().drop(columns=['index'])
mod_recipients.to_csv('data/anon_recipients.csv')
mod_recipients.head()

Unnamed: 0,rec_id,date,disinfectingWipes,surgicalCaps,disposableBooties,respirators,handmadeMasks,nitrileGloves,coveralls,handSanitizer,safetyGlasses,bodyBags,gowns,faceShields,safetyGoggles,thermometers,surgicalMasks,paprShield,babyMonitors,rec_req_id
0,rec0,2020-04-02 16:27:00+00:00,0,0,0,10000.0,0,10000,0,0,0,0,1000,5000,0,0,0,0,0,0
1,rec1,2020-04-02 16:35:00+00:00,4,0,0,9.0,9,5,0,4,0,0,9,9,0,0,0,0,0,1
2,rec2,2020-04-02 16:44:00+00:00,300,0,100,5.0,0,0,0,25,0,0,100,10,0,20,0,0,0,2
3,rec3,2020-04-02 16:47:00+00:00,50,0,0,200.0,100,100,0,100,0,0,400,200,0,50,0,0,0,3
4,rec4,2020-04-02 16:54:00+00:00,0,0,0,10.0,0,0,0,4,0,0,30,0,0,2,0,0,0,4


In [61]:
mod_donors.drop(columns=['zipcode'],inplace=True)

In [62]:
mod_donors['don_req_id'] = np.arange(0,len(mod_donors))

In [64]:
mod_donors = mod_donors.reset_index().drop(columns=['index'])
mod_donors.to_csv('data/anon_donors.csv')
mod_donors.head()

Unnamed: 0,don_id,date,ppe,qty,don_req_id
0,don0,2020-04-09 13:08:00+00:00,faceShields,10.0,0
1,don1,2020-04-09 13:36:00+00:00,faceShields,1.0,1
2,don2,2020-04-09 13:53:00+00:00,faceShields,3000.0,2
3,don3,2020-04-09 14:24:00+00:00,faceShields,5.0,3
4,don71,2020-04-09 17:52:00+00:00,faceShields,10.0,4


In [None]:
import pickle
pickle.dump(zipcodes,open( 'data/anon_distance_matrix.p', "wb" ))
zipcodes