In [1]:
import pandas as pd
import glob
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
import requests
import json

In [2]:
# Make a request to the API address from Toronto Police Department
response = requests.get("https://services.arcgis.com/S9th0jAJ7bqgIRjw/arcgis/rest/services/Major_Crime_Indicators/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson")

In [3]:
#make sure that the request worked well
if response.status_code==200:
    data =response.json()
    formatted_data =json.dumps(data, indent=4, sort_keys=True)
    print(formatted_data)
else:
    print("Failed to retrieve the data from the Toronto Police API")

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    -79.50447515,
                    43.76637136
                ],
                "type": "Point"
            },
            "id": 1,
            "properties": {
                "Division": "D31",
                "Hood_ID": "27",
                "Index_": 201,
                "Latitude": 43.76637136,
                "Longitude": -79.50447515,
                "Neighbourhood": "York University Heights",
                "ObjectId": 1,
                "event_unique_id": "GO-20141273318",
                "location_type": "Apartment (Rooming House, Condo)",
                "mci_category": "Assault",
                "occurrencedate": 1388725200000,
                "occurrenceday": 3,
                "occurrencedayofweek": "Friday    ",
                "occurrencedayofyear": 3,
                "occurrencehour": 11,
                "occurrencemonth": "January",
                "occurrence

In [5]:
#Count the number of crimes
crimes_count = len(data["features"])


In [6]:
#Print the number of crimes
print(f"features:{crimes_count}")

features:1000


In [7]:
#create a variable to call the json for each crime type
types=["Assault","Auto_Theft","Break_and_Enter","Homicide_ASR_RC_TBL_002","Robbery","Theft_Over","Bicycle_Thefts","Shootings_and_Firearm_Discharges"
     ,"Theft_from_Motor_Vehicle"]
#variable to store the sum of each crime types
count=[]
#loop through each of the crime types to call the json to extract the total no of crimes
for types1 in types:
    try:
        url=f"https://services.arcgis.com/S9th0jAJ7bqgIRjw/arcgis/rest/services/{types1}/FeatureServer/0/query?where=1%3D1&outFields=*&returnIdsOnly=true&outSR=4326&f=json"
        response_json=requests.get(url).json()
        count.append(response_json["objectIds"][-1])
    except:
        print("Cant find")

In [8]:
#Create data frame to store crime total for each of the crime type
count_df=pd.DataFrame({"What Crime Happened ":types,"Count of crimes":count})
count_df=count_df.sort_values("Count of crimes",ascending=False)
count_df

Unnamed: 0,What Crime Happened,Count of crimes
0,Assault,161833
8,Theft_from_Motor_Vehicle,76914
2,Break_and_Enter,56466
1,Auto_Theft,40794
6,Bicycle_Thefts,30154
4,Robbery,29230
5,Theft_Over,9936
7,Shootings_and_Firearm_Discharges,5535
3,Homicide_ASR_RC_TBL_002,1286


In [9]:
#download the top 3 crimes CSV files
#use glob to find all CSV files in a directory
all_files = glob.glob("*csv")

In [10]:
#create an empty list to store DataFrames
df_list =[]

In [11]:
#loop through each file and read it into a DataFrame
for file in all_files:
    df = pd.read_csv(file)
    df_list.append(df)

In [12]:
#concatenate the list of DataFrames into a single DataFrame
result = pd.concat(df_list, axis=0, ignore_index=True)

In [13]:
print(result.head())

              X             Y  Index_ event_unique_id Division  \
0 -8.850398e+06  5.429359e+06     201  GO-20141273318      D31   
1 -8.818405e+06  5.436836e+06     202  GO-20141274349      D42   
2 -8.853791e+06  5.402071e+06     203  GO-20141274052      D22   
3 -8.832822e+06  5.419628e+06     204  GO-20141276966      D53   
4 -8.853452e+06  5.410706e+06     205  GO-20141274457      D22   

           occurrencedate            reporteddate  \
0  2014/01/03 05:00:00+00  2014/01/03 05:00:00+00   
1  2014/01/03 05:00:00+00  2014/01/03 05:00:00+00   
2  2014/01/03 05:00:00+00  2014/01/03 05:00:00+00   
3  2014/01/03 05:00:00+00  2014/01/03 05:00:00+00   
4  2014/01/03 05:00:00+00  2014/01/03 05:00:00+00   

                                       location_type premises_type  ucr_code  \
0                   Apartment (Rooming House, Condo)     Apartment      1430   
1  Single Home, House (Attach Garage, Cottage, Mo...         House      2120   
2                  Open Areas (Lakes, Parks,

In [14]:
result.columns

Index(['X', 'Y', 'Index_', 'event_unique_id', 'Division', 'occurrencedate',
       'reporteddate', 'location_type', 'premises_type', 'ucr_code', 'ucr_ext',
       'offence', 'reportedyear', 'reportedmonth', 'reportedday',
       'reporteddayofyear', 'reporteddayofweek', 'reportedhour',
       'occurrenceyear', 'occurrencemonth', 'occurrenceday',
       'occurrencedayofyear', 'occurrencedayofweek', 'occurrencehour',
       'mci_category', 'Hood_ID', 'Neighbourhood', 'Longitude', 'Latitude',
       'ObjectId'],
      dtype='object')

In [15]:
#Cleaning the data to analyse the premises_type first finding what we have in premises_type
result["premises_type"].unique()

array(['Apartment', 'House', 'Outside', 'Commercial', 'Other',
       'Educational', 'Transit'], dtype=object)

In [16]:
#changing this to a string 
def mylower(string,spli_character=" "):
    return string.lower()

In [17]:
#demonstrating that this is now a string
result["premises_type"].map(mylower)

0          apartment
1              house
2            outside
3         commercial
4         commercial
             ...    
301228         house
301229       outside
301230    commercial
301231       outside
301232       outside
Name: premises_type, Length: 301233, dtype: object

In [18]:
result.dtypes

X                      float64
Y                      float64
Index_                   int64
event_unique_id         object
Division                object
occurrencedate          object
reporteddate            object
location_type           object
premises_type           object
ucr_code                 int64
ucr_ext                  int64
offence                 object
reportedyear             int64
reportedmonth           object
reportedday              int64
reporteddayofyear        int64
reporteddayofweek       object
reportedhour             int64
occurrenceyear         float64
occurrencemonth         object
occurrenceday          float64
occurrencedayofyear    float64
occurrencedayofweek     object
occurrencehour           int64
mci_category            object
Hood_ID                 object
Neighbourhood           object
Longitude              float64
Latitude               float64
ObjectId                 int64
dtype: object

In [19]:
# Get total counting by premises
crimes = result["premises_type"].value_counts()

In [20]:
crimes.head()

Outside       80079
Apartment     72061
Commercial    60860
House         54076
Other         17788
Name: premises_type, dtype: int64

In [22]:
# Use DataFrame.plot() in order to create a bar chart of the data
crimes.plot(kind="bar", figsize=(10, 5))

# Set a title for the chart
plt.title("Discover where the crimes are happening in Toronto")

plt.show()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [23]:
#creating bar graphs for each crime

#Cleaning the data to analyse the premises_type first finding what we have in premises_type
result["mci_category"].unique()


array(['Assault', 'Break and Enter', 'Theft Over', 'Robbery',
       'Auto Theft'], dtype=object)

In [None]:
#creating bar graphs for each crime.Crime1 is assault, Crime2 is Theft_from_Motor_Vehicle, Crime3 is Break_and_Enter
# Store filepath in a variable
crime1 = "assault.csv"
crime2 = "Theft_from_Motor_Vehicle.csv"
crime3 = "Break_and_Enter.csv"


In [None]:
#Read each CSV separate for each type of crime
crime1_df =pd.read_csv(crime1)
crime2_df =pd.read_csv(crime2)
crime3_df =pd.read_csv(crime2)


In [None]:

# show columns or first lines what is best I don't know
assault_df.columns