In [1]:
import pandas as pd

In [2]:
folder = '../data/ca-doj'

warrants = pd.read_csv(f'{folder}/electronic-search-warrant-notifications.csv')
print(warrants.columns)
warrants['fn'] = 'electronic-search-warrant-notifications.csv'
for i in range(7,10):
    fn = f"eswn201{i}.xlsx"
    df = pd.read_excel(f"{folder}/{fn}")
    df['fn'] = fn
    warrants = pd.concat([warrants, df]).copy()

warrants['Start Date for Info'] = warrants['Start Date for Info'].apply(pd.to_datetime)
warrants['End Date for Info'] = warrants['End Date for Info'].apply(pd.to_datetime)
warrants['Submitted'] = warrants['Submitted'].apply(pd.to_datetime)
warrants['Submitted Year'] = warrants['Submitted'].dt.year

Index(['Agency Name:', 'County of Court', 'Warrant Number', 'Submitted',
       'Nature of the investigation', 'Primary Nature',
       'Crime of Violence Options', 'Secondary Nature', 'Order Served on',
       'Business Name', 'Items to be searched for:', 'Start Date for Info',
       'End Date for Info', 'Grounds for Issuance',
       'Reasons for Delay (if any)', 'Emergency?',
       'Facts giving rise to the emergency'],
      dtype='object')


### Filter down based item definition language

In [3]:
geofence = pd.read_csv('../data/geofence-references-key.csv')
geofence = geofence[geofence['Probable Geofence Reference'].eq('Y')]

def is_geofence(l):
    for i in l:
        if i in list(geofence['Item to Search']):
            return True
    return False

warrants['References Geofence/Reverse Location'] = warrants['Items to be searched for:'].str.lower().str.split('\n').apply(is_geofence)

In [92]:
gf = warrants[warrants['References Geofence/Reverse Location']].sort_values(by='Submitted Year')

In [93]:
gf.to_csv('../output/probable-geofence-warrants-items-raw.csv', index=False)

We then manually reviewed the raw data.

In [76]:
geo = pd.read_csv("../data/probable-items-geofence-warrants.csv")

We also looked for warrants with similar keywords as the above in the `Nature of the investigation` column.

In [87]:
warrants[
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('geofence').eq(True) |
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('reverse').eq(True) | 
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('deviceid').eq(True) | 
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('anonymous').eq(True) |
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('anonymized').eq(True) |
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('certainarea').eq(True) |
    warrants['Nature of the investigation'].str.lower().str.replace(' ', '').str.contains('geographic').eq(True)
].to_csv('../output/probable-geofence-warrants-nature-raw.csv')

We then manually reviewed that data and combined it with the `Items to be searched for:` data.

In [88]:
geo = pd.concat([geo, pd.read_csv("../data/probable-nature-geofence-warrants.csv")]).copy()
geo = geo.drop_duplicates()

In [89]:
geo.groupby('Submitted Year').count()[['Submitted']]

Unnamed: 0_level_0,Submitted
Submitted Year,Unnamed: 1_level_1
2018,1
2019,31
2020,9
2021,16


In [90]:
geo.sort_values(by="Submitted").to_csv("../data/probable-geofence-warrants.csv", index=False)

### Filter based on corporate mentions

In [91]:
def contains_google(row):
    truth = 0
    for key in row.keys():
        s = str(row[key])
        truth += 'google' in s.lower()
    return bool(truth)

cf = warrants[warrants.apply(contains_google, axis=1)].sort_values(by='Submitted Year')

In [13]:
print()
cf.groupby('Submitted Year').count()[['Submitted']]




Unnamed: 0_level_0,Submitted
Submitted Year,Unnamed: 1_level_1
2017,67
2018,65
2019,168
2020,186
2021,164


In [14]:
cf.to_csv('../data/calecpa-google-warrants.csv', index=False)