# Processing military surplus equipment data

#### Import python tools

In [1]:
import json
import pandas as pd
import altair as alt
import altair_latimes as lat
pd.options.display.max_columns = 50

In [2]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.float_format = '${:,.0f}'.format

#### Download the data before processing

In [3]:
# %run '00-downloading.ipynb'

#### Read in and clean up the raw data

In [4]:
src = pd.read_csv("data/military-surplus-equipment.csv").set_index('ship_date').reset_index()

In [5]:
src = src.rename(columns={'station_name_lea':'agency', 'nsn':'stock_number', 'item_name':'item', 
                        'acquisition_value':'value', 'ship_date':'date'}).drop(['Unnamed: 0', 'demil_ic'], axis=1)

In [6]:
src.item = src.item.str.strip()
src.state = src.state.str.strip()

#### Limit to U.S. states and remove aircraft

In [7]:
territories = ['MP','GU','PR','VI']

In [8]:
df = pd.DataFrame(src[(~src["item"].str.contains('AIRCRAFT')) & \
                     (~src["state"].isin(territories))])

#### Deal with dates

In [9]:
df['date'] = pd.to_datetime(df['date'])
df['yearmonth'] = pd.to_datetime(df['date']).map(lambda dt: dt.replace(day=1))
df['yearmonth'] = pd.to_datetime(df['yearmonth'])
df['year'] = pd.to_datetime(df['date']).dt.year

In [10]:
df[df['state'] == 'TX'].sort_values(by='value', ascending=False).head(10)

Unnamed: 0,date,state,agency,stock_number,item,quantity,ui,value,demil_code,station_type,yearmonth,year
125156,2016-10-28 09:35:00,TX,JEFFERSON COUNTY SHERIFF'S OFFICE,1520-00-087-7637,"HELICOPTER,UTILITY",1,Each,"$922,704",C,State,2016-10-01 09:35:00,2016
128965,2015-03-31 00:00:00,TX,VAN ZANDT COUNTY SHERIFF'S OFFICE,2355-01-590-2719,"TRUCK,WRECKER",1,Each,"$880,674",C,State,2015-03-01 00:00:00,2015
128646,2016-07-19 00:00:00,TX,TERRELL PD,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-07-01 00:00:00,2016
123745,2016-08-18 00:00:00,TX,HARRIS COUNTY CONSTABLE PCT 3,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-08-01 00:00:00,2016
128498,2016-10-12 00:00:00,TX,SOUTHLAKE POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-10-01 00:00:00,2016
127783,2016-02-11 00:00:00,TX,SAN ANGELO POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-02-01 00:00:00,2016
121916,2016-07-11 00:00:00,TX,CUERO POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-07-01 00:00:00,2016
121653,2016-07-21 00:00:00,TX,CLEBURNE POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-07-01 00:00:00,2016
126050,2016-10-13 00:00:00,TX,MARSHALL POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-10-01 00:00:00,2016
121132,2016-09-23 00:00:00,TX,BURKBURNETT POLICE DEPT,2355-01-602-3357,MINE RESISTANT VEHICLE,1,Each,"$865,000",C,State,2016-09-01 00:00:00,2016


---

#### How much spending has their been over the years, by state?

In [11]:
state_year = df[df['year'] > 1999].groupby(['year', 'state'])['value'].sum().reset_index()
state_yearmonth = df[df['year'] > 2009].groupby(['yearmonth', 'state'])['value'].sum().reset_index()

In [12]:
state_year.year = state_year.year.astype(str)

In [13]:
state_sum_year = alt.Chart(state_yearmonth)\
    .mark_bar(size=3)\
    .encode(
    x=alt.X('yearmonth:T', title=' ', axis=alt.Axis(grid=False, tickCount=10, format='%Y'),\
#            scale=alt.Scale(domain=('2000', '2020'))
           ),
    y=alt.Y('value:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''),\
#            scale=alt.Scale(domain=(0, 100000000))
           )
).properties(width=630, height=200,
    title='Surplus military purchasing by law enforcement, by state: 2000-2020'
 )

(state_sum_year).configure_view(strokeOpacity=0)

In [14]:
# state_sum_year = alt.Chart(state_year)\
#     .mark_bar(size=3)\
#     .encode(
#     x=alt.X('year:T', title=' ', axis=alt.Axis(grid=False, tickCount=1, format='%Y'),\
# #            scale=alt.Scale(domain=('2000', '2020'))
#            ),
#     y=alt.Y('value:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
#      gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''),\
# #            scale=alt.Scale(domain=(0, 100000000))
#            )
# ).properties(width=70, height=70,
#     title='Surplus military purchasing, by state: 2000-2020'
#  ).facet(
#     facet=alt.Facet('state:N'),
#     columns=10,
#     padding={"left": 0, "top": 0, "right": -0, "bottom": 0}
# )

# (state_sum_year).configure_view(strokeOpacity=0)

---

In [15]:
df_trump = df[df['date'] > '2017-01-20']

In [16]:
df_trump[df_trump['state'] == 'CA'].value.sum()

25937550.75

#### Split out the state into csv

In [17]:
for s in df_trump.state.unique():
    df_trump[df_trump['state'] == '{}'.format(s)].groupby(['stock_number', 'item']).sum().round()\
    .reset_index().sort_values(by='value', ascending=False).to_csv('data/output/states/{}.csv'.format(s))

In [18]:
df_trump[df_trump['agency'] == 'ORANGE COUNTY SHERIFFS DEPT'].groupby(['stock_number','item']).sum().round()\
    .reset_index().sort_values(by='value', ascending=False).head(10)

Unnamed: 0,stock_number,item,quantity,value,year
41,5895-01-591-9081,TACTICAL EXPLOITATION SYSTEM,1,"$449,938",2019
4,1385-01-593-6219,PACKBOT 510 WITH FASTAC REMOTELY CONTROLLED VE...,5,"$385,000",10085
29,4310-DS-COM-PVAC,COMPRESSORS AND VACUUM PUMPS,1,"$70,000",2019
64,6920-01-617-0865,"TARGET,MANNEQUIN",10,"$69,700",2019
30,4940-DS-MSC-REPE,MISC MAINTENANCE AND REPAIR SHOP,5,"$62,125",10095
42,6117-01-598-1836,GROUND RENEWABLE EX,2,"$58,000",2019
40,5855-01-485-3429,THERMAL IMAGING SYSTEM,2,"$37,000",4038
7,2330-01-458-6865,"SEMITRAILER,LOW BED",1,"$33,156",2019
55,6635-01-549-8204,"SCANNER,X-RAY,BAGG",1,"$26,500",2019
17,3990-DS-MHE-MISC,MISC MATERIALS HANDLING EQUIPMENT,3,"$24,900",6057


In [19]:
df_trump[df_trump['item'] == 'TACTICAL EXPLOITATION SYSTEM']

Unnamed: 0,date,state,agency,stock_number,item,quantity,ui,value,demil_code,station_type,yearmonth,year
17904,2019-12-12,CA,ORANGE COUNTY SHERIFFS DEPT,5895-01-591-9081,TACTICAL EXPLOITATION SYSTEM,1,Set,"$449,938",A,State,2019-12-01,2019
108408,2018-05-04,SC,LAW ENFORCEMENT DIVISION,5895-01-622-3136,TACTICAL EXPLOITATION SYSTEM,1,Set,"$12,000",C,State,2018-05-01,2018


In [20]:
df_trump.loc[17904]

date                     2019-12-12 00:00:00
state                                     CA
agency           ORANGE COUNTY SHERIFFS DEPT
stock_number                5895-01-591-9081
item            TACTICAL EXPLOITATION SYSTEM
quantity                                   1
ui                                       Set
value                               $449,938
demil_code                                 A
station_type                           State
yearmonth                2019-12-01 00:00:00
year                                    2019
Name: 17904, dtype: object

---

#### Big numbers

---

#### Time

---

#### California