## The City of Scottsdale updates the Citation dataset with the latest weeks worth of data on Sunday evening.  My goal is to set up tool to automatically download the latest weeks worth of data by access the API.

In [100]:
import pandas as pd
import altair as alt
import glob

In [187]:
# read in csv files
# set tcmainid as index
files = glob.glob('data/*.csv')

citations = pd.concat([pd.read_csv(f, index_col='Citation #') \
                      for f in files], sort=True)

In [188]:
citations.shape

(97992, 21)

In [189]:
citations.head()

Unnamed: 0_level_0,Arizona Statute Code,Beat,Charge Description,Citation Date,Citation Time,Citation Type Code,Citation Type Description,Cited Person,Cited Person Age,Cited Person Ethnicity,...,Cited Person Sex,City,District,Officer Badge #,State,Street,Street Number,Zip,Zone,tcmainid
Citation #,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2074492,97XX,12.0,Amended Charge,12/20/2018,2222,CV,Civil Traffic,"Way, Dennis",51,U,...,M,Scottsdale,D3,1476,AZ,E Sahuaro DR / N Scottsdale Rd,xxx,,3003,641301
8093305,28-701A,10.0,Speed Greater Than R&P or Posted,12/20/2018,844,CV,Civil Traffic,"Grady, Scott",61,U,...,M,Scottsdale,D3,780,AZ,N Scottsdale Rd,8xxx,,2601,641302
2076259,28-4135C,3.0,No Proof of Insurance,12/18/2018,2147,CV,Civil Traffic,"Dudgeon, Miranda",20,U,...,F,Scottsdale,D1,1509,AZ,E Osborn Rd / N Scottsdale Rd,xxx,,706,641303
8093703,28-2532A,5.0,Expired/No Arizona Registration,12/21/2018,2116,CV,Civil Traffic,"Fox, William",34,U,...,M,Scottsdale,D2,826,AZ,E Chaparral Rd / N Hayden Rd,xxx,,1106,641304
2053182,28-4135C,15.0,No Proof of Insurance,12/20/2018,2324,CV,Civil Traffic,"Rhoden, David",24,U,...,M,Scottsdale,D4,1440,AZ,E Frank Lloyd Wright Bl / N Hayden Rd,xxx,,3604,641305


In [190]:
# remove duplicates
citations = citations.loc[~citations.index.duplicated(),:]

In [191]:
# verify size of resulting dataset
citations.shape

(49054, 21)

## Download latest citation data

In [192]:
citations_recent = pd.read_csv('https://opendatafiles.blob.core.windows.net/odfiles/spd_PDCitations.csv', 
                               index_col='Citation #')

In [195]:
# combine citations
all_citations = pd.concat([citations,
                           citations_recent])

In [197]:
# remove duplicates
all_citations = all_citations.loc[~all_citations.index.duplicated(),:]

In [198]:
all_citations.loc[:,'Citation Date'] = pd.to_datetime(all_citations['Citation Date'])

In [200]:
all_citations['Citation Date'].min()

Timestamp('2017-09-23 00:00:00')

In [201]:
all_citations['Citation Date'].max()

Timestamp('2020-04-16 00:00:00')

## Next steps
 1. set up task scheduler to run this script every Sunday evening when the Citation dataset is updated
 1. automatically push updated dataset to github

# None of this worked

## Query citations dataset using CKAN format

In [4]:
# http://cosopendata.westus.cloudapp.azure.com/api/3/action/datastore_search_sql?sql=SELECT * from "0d9377aa-dec5-433e-9e4d-f5219ea85ce5" WHERE title LIKE 'jones'

In [57]:
# select * sql query ordering by date
js_object = pd.read_json(path_or_buf= 
             'http://cosopendata.westus.cloudapp.azure.com/api/3/action/datastore_search_sql?sql=SELECT%20*from%20%220d9377aa-dec5-433e-9e4d-f5219ea85ce5%22order%20by%20%22Citation%20Date%22desc' )

In [58]:
js_object

Unnamed: 0,help,success,result
fields,http://cosopendata.westus.cloudapp.azure.com/a...,True,"[{'type': 'int4', 'id': '_id'}, {'type': 'tsve..."
records,http://cosopendata.westus.cloudapp.azure.com/a...,True,"[{'City': 'Scottsdale', 'Cited Person': 'Cantu..."
sql,http://cosopendata.westus.cloudapp.azure.com/a...,True,"SELECT *from ""0d9377aa-dec5-433e-9e4d-f5219ea8..."


In [59]:
# access elements of json records
js_object.loc['records', 'result'][0]

{'City': 'Scottsdale',
 'Cited Person': 'Cantu, Adrian',
 'Zip': None,
 'Officer Badge #': '1140            ',
 'Cited Person Ethnicity': 'H',
 'State': 'AZ',
 'Zone': '2501',
 'Citation Date': '2020-04-05T00:00:00',
 'Citation #': '2098299',
 'Cited Person Sex': 'M',
 'Citation Type Code': 'CT',
 'Street Number': '7xxx',
 'Citation Time': '14',
 'Charge Description': 'Suspended/Revoked D.L.',
 'Cited Person Age': '30',
 'District': 'D3',
 'Arizona Statute Code': '28-3473A',
 '_full_text': "'-04':4 '-05':5 '-3473':11 '00':7,8 '10':27 '1140':18 '14':9 '2020':3 '2098299':2 '2501':28 '28':10 '30':31 '688412':1 '7xxx':19 'adrian':30 'az':25 'bend':22 'cantu':29 'crimin':16 'ct':15 'd.l':14 'd3':26 'e':20 'h':34 'indian':21 'm':33 'rd':23 'scottsdal':24 'suspended/revoked':13 't00':6 'traffic':17 'w':32",
 'Cited Person Race': 'W',
 'Beat': '10',
 'Street': 'E Indian Bend Rd',
 'Citation Type Description': 'Criminal Traffic',
 'tcmainid': '688412',
 '_id': 4888}

In [60]:
# turn json records into df
df = pd.DataFrame(js_object.loc['records', 'result'])

In [61]:
# clearly the data is being limited at 5K rows
# these don't seem to be the most recent 5K rows
df.shape

(5000, 24)

In [62]:
df.head()

Unnamed: 0,City,Cited Person,Zip,Officer Badge #,Cited Person Ethnicity,State,Zone,Citation Date,Citation #,Cited Person Sex,...,Cited Person Age,District,Arizona Statute Code,_full_text,Cited Person Race,Beat,Street,Citation Type Description,tcmainid,_id
0,Scottsdale,"Cantu, Adrian",,1140,H,AZ,2501,2020-04-05T00:00:00,2098299,M,...,30,D3,28-3473A,"'-04':4 '-05':5 '-3473':11 '00':7,8 '10':27 '1...",W,10,E Indian Bend Rd,Criminal Traffic,688412,4888
1,Scottsdale,"Oconnell, Victoria",,1307,N,AZ,2802,2020-04-04T00:00:00,2095657,F,...,43,D3,13-1504A2,"'-04':4,5 '-1504':11 '00':7,8 '11':32 '13':10 ...",W,11,E Del Plomo DR,Criminal,688399,4885
2,Scottsdale,"Jurica, Christopher",,1262,N,AZ,3404,2020-04-04T00:00:00,2093214,M,...,19,D4,28-1595B,"'-04':4,5 '-1595':11 '00':7,8 '100th':24 '1262...",W,17,N 100th St / N Frank Lloyd Wright Bl,Criminal Traffic,688410,4887
3,Scottsdale,"Marquez, Alejandro",,1140,U,AZ,3003,2020-04-04T00:00:00,2098298,M,...,19,D3,13-1805A1,"'-04':4,5 '-1805':11 '00':7,8 '1140':20 '12':2...",W,12,E Shea Bl,Criminal,688406,4886
4,Scottsdale,"Budicak, Michael",,1492,U,AZ,3802,2020-04-04T00:00:00,2100088,M,...,54,D4,28-721A,"'-04':4,5 '-721':11 '00':7,8 '1492':20 '18':31...",W,18,E Legacy Bl / N Pima Rd,Civil Traffic,688434,4892


In [63]:
df['Citation Date'] = pd.to_datetime(df['Citation Date'])

In [64]:
df['Citation Date'].min()

Timestamp('2019-04-08 00:00:00')

In [65]:
df['Citation Date'].max()

Timestamp('2020-04-05 00:00:00')

In [66]:
df['Citation Date'].dt.month.value_counts().sort_index()

1       3
2     519
3     463
4     876
5      24
6     589
7     193
8     776
9       6
10    775
12    776
Name: Citation Date, dtype: int64

### No mater how I structure the query, ckan returns the same 5K rows in no particular order

In [70]:
# select * sql query
js_object = pd.read_json(path_or_buf= 
    'http://cosopendata.westus.cloudapp.azure.com/api/3/action/datastore_search_sql?sql=SELECT%20*%20from%20%220d9377aa-dec5-433e-9e4d-f5219ea85ce5%22')

In [71]:
# turn json records into df
df2 = pd.DataFrame(js_object.loc['records', 'result'])

In [72]:
# clearly the data is being limited at 5K rows
# these don't seem to be the most recent 5K rows
df2.shape

(5000, 24)

In [73]:
df2.head()

Unnamed: 0,City,Cited Person,Zip,Officer Badge #,Cited Person Ethnicity,State,Zone,Citation Date,Citation #,Cited Person Sex,...,Cited Person Age,District,Arizona Statute Code,_full_text,Cited Person Race,Beat,Street,Citation Type Description,tcmainid,_id
0,Scottsdale,"Andriulli, Ryan",,1273,U,AZ,1002,2019-06-22T00:00:00,2088007,M,...,45,D2,28-1381A1,"'-06':4 '-1381':11 '-22':5 '00':7,8 '1002':34 ...",W,7,N 68th St / E Camelback Rd,Criminal Traffic,660621,1
1,Scottsdale,"Lunell, Sean",,1402,N,AZ,4403,2019-06-22T00:00:00,8094625,M,...,53,D4,28-1381A1,"'-06':4 '-1381':11 '-22':5 '00':7,8 '1402':22 ...",W,20,N Granite Reef Rd / E Lone Mountain Rd,Criminal Traffic,660623,2
2,Scottsdale,"Delic, Ermin",,1140,N,AZ,2601,2019-06-22T00:00:00,2061773,M,...,30,D3,28-1464G,"'-06':4 '-1464':11 '-22':5 '00':7,8 '10':31 '1...",W,10,E Eastwood Ln,Criminal Traffic,660627,3
3,Scottsdale,"Rapp, Henry",,1491,N,AZ,3406,2019-06-22T00:00:00,2086005,M,...,18,D4,4-244.9,"'-06':4 '-22':5 '-244.9':11 '00':7,8 '1491':18...",W,17,N Thompson Peak Py,Criminal,660629,4
4,Scottsdale,"Stokes, Tracey",,1472,N,AZ,904,2019-06-22T00:00:00,2084690,F,...,26,D2,4-251A2,"'-06':4 '-22':5 '-251':11 '00':7,8 '1472':22 '...",B,6,N 75th St,Criminal,660631,5


In [74]:
df2['Citation Date'] = pd.to_datetime(df2['Citation Date'])

In [75]:
df2['Citation Date'].min()

Timestamp('2019-04-08 00:00:00')

In [76]:
df2['Citation Date'].max()

Timestamp('2020-04-05 00:00:00')

In [77]:
df2['Citation Date'].dt.month.value_counts().sort_index()

1       3
2     519
3     463
4     876
5      24
6     589
7     193
8     776
9       6
10    775
12    776
Name: Citation Date, dtype: int64

## Query citations dataset using CKAN format - limit time of data

In [22]:
url = 'http://cosopendata.westus.cloudapp.azure.com/api/3/action/datastore_search_sql?sql=SELECT%20*from%20%220d9377aa-dec5-433e-9e4d-f5219ea85ce5%22where%20extract(month%20from%20%22Citation%20Date%22)%20=%20%274%27'

In [23]:
arrests_js_object = pd.read_json(path_or_buf=url)

In [24]:
# access elements of json records
arrests_js_object.loc['records', 'result'][0]

{'City': 'Scottsdale',
 'Cited Person': 'Avila, Benancio',
 'Zip': None,
 'Officer Badge #': '1281            ',
 'Cited Person Ethnicity': 'U',
 'State': 'AZ',
 'Zone': '3003',
 'Citation Date': '2019-04-13T00:00:00',
 'Citation #': '2081396',
 'Cited Person Sex': 'M',
 'Citation Type Code': 'CV',
 'Street Number': '7xxx',
 'Citation Time': '1007',
 'Charge Description': 'Suspended License For Failure to Appear/Pay',
 'Cited Person Age': '58',
 'District': 'D3',
 'Arizona Statute Code': '28-3482A',
 '_full_text': "'-04':4 '-13':5 '-3482':11 '00':7,8 '1007':9 '12':30 '1281':22 '2019':3 '2081396':2 '28':10 '3003':31 '58':34 '653905':1 '7xxx':23 'appear/pay':18 'avila':32 'az':28 'benancio':33 'bl':26 'civil':20 'cv':19 'd3':29 'e':24 'failur':16 'licens':14 'm':36 'scottsdal':27 'shea':25 'suspend':13 't00':6 'traffic':21 'u':37 'w':35",
 'Cited Person Race': 'W',
 'Beat': '12',
 'Street': 'E Shea Bl',
 'Citation Type Description': 'Civil Traffic',
 'tcmainid': '653905',
 '_id': 1301}

In [25]:
# turn json records into df
arrests = pd.DataFrame(arrests_js_object.loc['records', 'result'])

In [26]:
arrests.shape

(876, 24)

In [30]:
arrests.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 876 entries, 0 to 875
Data columns (total 24 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   City                       876 non-null    object
 1   Cited Person               876 non-null    object
 2   Zip                        51 non-null     object
 3   Officer Badge #            876 non-null    object
 4   Cited Person Ethnicity     876 non-null    object
 5   State                      876 non-null    object
 6   Zone                       876 non-null    object
 7   Citation Date              876 non-null    object
 8   Citation #                 876 non-null    object
 9   Cited Person Sex           876 non-null    object
 10  Citation Type Code         876 non-null    object
 11  Street Number              876 non-null    object
 12  Citation Time              876 non-null    object
 13  Charge Description         876 non-null    object
 14  Cited Pers

In [34]:
arrests['Citation Date'] = pd.to_datetime(arrests['Citation Date'])

In [35]:
arrests['Citation Date'].min()

Timestamp('2019-04-08 00:00:00')

In [43]:
arrests['Citation Date'].dt.year.value_counts()

2019    859
2020     17
Name: Citation Date, dtype: int64

## Query arrests dataset using SQL format

In [11]:
arrests_js_object = pd.read_json(path_or_buf= 
             'http://cosopendata.westus.cloudapp.azure.com/sv/api/3/action/datastore_search_sql?sql=SELECT%20*%20from%20%22e122bdb7-4d12-495d-9a60-6a54fc229414%22')

# sql=SELECT * from "0d9377aa-dec5-433e-9e4d-f5219ea85ce5" WHERE title LIKE 'jones'

In [13]:
arrests_js_object

Unnamed: 0,help,success,result
fields,http://cosopendata.westus.cloudapp.azure.com/s...,True,"[{'type': 'int4', 'id': '_id'}, {'type': 'tsve..."
records,http://cosopendata.westus.cloudapp.azure.com/s...,True,"[{'Arrest Time': '00:00', 'Arrest #': '2019007..."
sql,http://cosopendata.westus.cloudapp.azure.com/s...,True,"SELECT * from ""e122bdb7-4d12-495d-9a60-6a54fc2..."


In [14]:
# access elements of json records
arrests_js_object.loc['records', 'result'][0]

{'Arrest Time': '00:00',
 'Arrest #': '2019007790',
 'Zone': '3301',
 'District': 'D4      ',
 'Arizona Statute Code': '28-1381A1',
 'Beat': '15      ',
 'DR #': '19-14369  ',
 'Age': '46',
 'Arrest Location': 'N Scottsdale Rd / E Sutton DR',
 '_full_text': "'-06':3 '-07':2 '-1381':14 '-14369':11 '00':5,6,7,8 '1273':40 '15':35 '19':10 '2019':1 '2019007790':9 '28':13 '3301':36 '46':27 '85308':39 'a1':15 'az':38 'b':25 'book':12 'd4':34 'davina':23 'degre':21 'dr':33 'dui':17 'dui-impair':16 'e':31 'f':26 'g':24 'glendal':37 'impair':18 'morgan':22 'n':28 'rd':30 'scottsdal':29 'slightest':20 'sutton':32 't00':4",
 'Arrestee': 'Morgan, Davina G',
 'Sex': 'F ',
 'Arrest Date': '2019-07-06T00:00:00',
 'Race': 'B ',
 'City of Arrestee': 'Glendale, AZ 85308',
 'Arrest Type': 'Booked',
 'Charge Description': 'DUI-Impaired to Slightest Degree',
 'Officer Serial#': '1273',
 '_id': 1}

In [15]:
# turn json records into df
arrests = pd.DataFrame(arrests_js_object.loc['records', 'result'])

In [16]:
arrests.shape

(2250, 19)

In [66]:
arrests.head()

Unnamed: 0,Arrest Time,Arrest #,Zone,District,Arizona Statute Code,Beat,DR #,Age,Arrest Location,_full_text,Arrestee,Sex,Arrest Date,Race,City of Arrestee,Arrest Type,Charge Description,Officer Serial#,_id
0,00:00,2019007790,3301,D4,28-1381A1,15,19-14369,46,N Scottsdale Rd / E Sutton DR,"'-06':3 '-07':2 '-1381':14 '-14369':11 '00':5,...","Morgan, Davina G",F,2019-07-06T00:00:00,B,"Glendale, AZ 85308",Booked,DUI-Impaired to Slightest Degree,1273,1
1,00:00,2019007791,904,D2,13-1203A3,6,19-14368,23,N Saddlebag Tr,"'-06':3 '-07':2 '-1203':14 '-14368':11 '00':5,...","Oliver, Terrell Lamar",M,2019-07-06T00:00:00,B,"Mesa, AZ 85203",Booked,Assault-Touched to Injure,1288,2
2,00:00,2019007820,401,D1,28-701.02A2,4,19-14319,25,E McDowell Rd,'-06':3 '-07':2 '-14319':11 '-701.02':18 '00':...,"Carrillo, Jose Elias",M,2019-07-06T00:00:00,W,"Phoenix, AZ 85042",Cite In Lieu of Detention,Exceed Limit By More Than 20mph,1248,3
3,00:00,2019007835,3006,D3,28-701.02A3,12,19-14350,34,L101 / E Shea Bl,'-06':3 '-07':2 '-14350':11 '-701.02':18 '00':...,"Barry, Rachel Elizabeth",F,2019-07-06T00:00:00,W,"Phoenix, AZ 85053",Cite In Lieu of Detention,Exceed 85 mph,1274,4
4,07:53,2019007760,99,,RULE 26.12,99,18-25626,36,S 4th Av,"'-06':3 '-07':2 '-25626':11 '00':5,6 '07':7 '1...","Smith, Sara Jean",F,2019-07-06T00:00:00,W,"Scottsdale, AZ 85256",Booked,Rule 26.12 Fail to Comply- Pre-Adjudication,81523,5


In [69]:
arrests['Arrest Date'] = pd.to_datetime(arrests['Arrest Date'])

In [73]:
arrests['Arrest Date'].min()

Timestamp('2019-07-06 00:00:00')

In [71]:
arrests['Arrest Date'].max()

Timestamp('2020-03-08 00:00:00')

In [81]:
arrests['Arrest Time'] = arrests['Arrest Time'].str.replace(':','')

In [108]:
alt.data_transformers.disable_max_rows()
alt.Chart(arrests[(arrests.Sex == 'M ') |
       (arrests.Sex == 'F ')]).mark_point().encode(
    x='Arrest Time',
    y='Age',
    color='Sex',
).properties(
    width=700,
    height=400
)

The fact that Altair doesn't summarize any x values is a bit wonky.

In [107]:
alt.Chart(arrests[(arrests.Sex == 'M ') |
       (arrests.Sex == 'F ')]).mark_bar().encode(
    y='mean(Age)',
    x='Sex',
).properties(
    width=700,
    height=400
)

## Phoenix crime dataset using SQL format - it seems to be blocking me at 32K rows.

In [161]:
js_object = pd.read_json(path_or_buf= 
             'https://www.phoenixopendata.com/api/3/action/datastore_search_sql?sql=SELECT%20*%20from%20%220ce3411a-2fc6-4302-a33f-167f68608a20%22')
# datastore_search_sql?sql=SELECT%20*%20from%20%22e122bdb7-4d12-495d-9a60-6a54fc229414%22'
# sql=SELECT * from "0d9377aa-dec5-433e-9e4d-f5219ea85ce5" WHERE title LIKE 'jones'

In [162]:
js_object

Unnamed: 0,help,success,result
fields,https://www.phoenixopendata.com/api/3/action/h...,True,"[{'type': 'int4', 'id': '_id'}, {'type': 'tsve..."
records,https://www.phoenixopendata.com/api/3/action/h...,True,"[{'OCCURRED ON': '2015-11-01T00:00:00', '100 B..."
records_truncated,https://www.phoenixopendata.com/api/3/action/h...,True,True
sql,https://www.phoenixopendata.com/api/3/action/h...,True,"SELECT * from ""0ce3411a-2fc6-4302-a33f-167f686..."


In [163]:
# access elements of json records
js_object.loc['records', 'result'][0]

{'OCCURRED ON': '2015-11-01T00:00:00',
 '100 BLOCK ADDR': 'N 43RD AVE & W CACTUS RD',
 'ZIP': '85029',
 'UCR CRIME CATEGORY': 'MOTOR VEHICLE THEFT',
 '_full_text': "'-01':4,9 '-09':10 '-11':3 '00':5,6,7,11,12,13 '2015':2 '2016':8 '201600000052855':1 '43rd':18 '85029':23 'ave':19 'cactus':21 'family':25 'house':26 'motor':14 'n':17 'rd':22 'single':24 'theft':16 'vehicle':15 'w':20",
 'OCCURRED TO': '2016-01-09T00:00:00',
 'INC NUMBER': '201600000052855',
 'PREMISE TYPE': 'SINGLE FAMILY HOUSE',
 '_id': 1}

In [165]:
# turn json records into df
phx_crime = pd.DataFrame(js_object.loc['records', 'result'])

In [166]:
phx_crime.shape

(32000, 9)

In [167]:
phx_crime.head()

Unnamed: 0,OCCURRED ON,100 BLOCK ADDR,ZIP,UCR CRIME CATEGORY,_full_text,OCCURRED TO,INC NUMBER,PREMISE TYPE,_id
0,2015-11-01T00:00:00,N 43RD AVE & W CACTUS RD,85029,MOTOR VEHICLE THEFT,"'-01':4,9 '-09':10 '-11':3 '00':5,6,7,11,12,13...",2016-01-09T00:00:00,201600000052855,SINGLE FAMILY HOUSE,1
1,2015-11-01T00:00:00,14XX E HIGHLAND AVE,85014,LARCENY-THEFT,"'-01':4 '-11':3,9,10 '00':5,6,7,13 '09':11 '14...",2015-11-11T09:30:00,201500002168686,PARKING LOT,2
2,2015-11-01T00:00:00,13XX E ALMERIA RD,85006,RAPE,"'-01':4 '-11':3 '00':5,6,7 '13xx':9 '2015':2 '...",,201600000594484,SINGLE FAMILY HOUSE,3
3,2015-11-01T00:00:00,51XX N 15TH ST,85014,LARCENY-THEFT,"'-01':4,10 '-11':3,9 '00':5,6,7,12,13 '09':11 ...",2015-11-01T09:00:00,201500002102327,APARTMENT,4
4,2015-11-01T00:00:00,102XX W MEDLOCK AVE,85307,MOTOR VEHICLE THEFT,"'-01':4,10 '-11':3,9 '00':5,6,7,12,13 '05':11 ...",2015-11-01T05:00:00,201500002101405,SINGLE FAMILY HOUSE,5


In [168]:
phx_crime['OCCURRED ON'] = pd.to_datetime(phx_crime['OCCURRED ON'])

In [169]:
phx_crime['OCCURRED ON'].min()

Timestamp('2015-11-01 00:00:00')

In [170]:
phx_crime['OCCURRED ON'].max()

Timestamp('2016-08-25 06:00:00')

In [171]:
phx_crime['OCCURRED ON'].dt.month.value_counts()

1.0     5302
12.0    5277
4.0     5273
3.0     5109
11.0    4847
2.0     4743
5.0     1320
8.0        8
6.0        7
Name: OCCURRED ON, dtype: int64