**How to Query the London Crime Data (BigQuery Dataset)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
london = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="london_crime")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "london_crime")
bq_assistant.list_tables()

['crime_by_lsoa']

In [3]:
bq_assistant.head("crime_by_lsoa", num_rows=20)

Unnamed: 0,lsoa_code,borough,major_category,minor_category,value,year,month
0,E01000461,Bexley,Criminal Damage,Criminal Damage To Motor Vehicle,0,2015,8
1,E01000221,Barnet,Criminal Damage,Other Criminal Damage,0,2016,4
2,E01033607,Westminster,Violence Against the Person,Offensive Weapon,0,2016,6
3,E01004066,Southwark,Violence Against the Person,Assault with Injury,0,2010,2
4,E01001053,Croydon,Other Notifiable Offences,Other Notifiable,0,2012,6
5,E01000335,Bexley,Violence Against the Person,Harassment,0,2008,11
6,E01002844,Kensington and Chelsea,Burglary,Burglary in a Dwelling,0,2015,1
7,E01002494,Hillingdon,Theft and Handling,Other Theft,0,2011,10
8,E01003483,Newham,Theft and Handling,Theft From Motor Vehicle,1,2015,10
9,E01002805,Islington,Robbery,Business Property,0,2015,10


In [4]:
bq_assistant.table_schema("crime_by_lsoa")

[SchemaField('lsoa_code', 'STRING', 'NULLABLE', 'Lower Layer Super Output Area code according to the Office for National Statistics', ()),
 SchemaField('borough', 'STRING', 'NULLABLE', '', ()),
 SchemaField('major_category', 'STRING', 'NULLABLE', '', ()),
 SchemaField('minor_category', 'STRING', 'NULLABLE', '', ()),
 SchemaField('value', 'INTEGER', 'NULLABLE', 'Summary of the number of crimes for the month', ()),
 SchemaField('year', 'INTEGER', 'NULLABLE', '', ()),
 SchemaField('month', 'INTEGER', 'NULLABLE', '', ())]

What is the change in the number of crime incidents from 2011 to 2016?




In [5]:
query1 = """
SELECT
  borough,
  no_crimes_2011,
  no_crimes_2016,
  no_crimes_2016 - no_crimes_2011 AS change,
  ROUND(((no_crimes_2016 - no_crimes_2011) / no_crimes_2016) * 100, 2) AS perc_change
FROM (
  SELECT
    borough,
    SUM(IF(year=2011, value, NULL)) no_crimes_2011,
    SUM(IF(year=2016, value, NULL)) no_crimes_2016
  FROM
    `bigquery-public-data.london_crime.crime_by_lsoa`
  GROUP BY
    borough )
ORDER BY
  perc_change ASC
;
        """
response1 = london.query_to_pandas_safe(query1)
response1.head(30)

Unnamed: 0,borough,no_crimes_2011,no_crimes_2016,change,perc_change
0,Camden,33956,29183,-4773,-16.36
1,Waltham Forest,23714,20716,-2998,-14.47
2,Redbridge,21994,19502,-2492,-12.78
3,Westminster,54243,48330,-5913,-12.23
4,Hammersmith and Fulham,22151,20174,-1977,-9.8
5,Ealing,29537,27127,-2410,-8.88
6,Merton,13548,12755,-793,-6.22
7,Croydon,29830,28712,-1118,-3.89
8,Sutton,11068,10658,-410,-3.85
9,Brent,27669,26693,-976,-3.66


What were the top 3 crimes per borough in 2016?



In [6]:
query2 = """
SELECT
  borough,
  major_category,
  rank_per_borough,
  no_of_incidents
FROM (
  SELECT
    borough,
    major_category,
    RANK() OVER(PARTITION BY borough ORDER BY SUM(value) DESC) AS rank_per_borough,
    SUM(value) AS no_of_incidents
  FROM
    `bigquery-public-data.london_crime.crime_by_lsoa`
  GROUP BY
    borough,
    major_category )
WHERE
  rank_per_borough <= 3
ORDER BY
  borough,
  rank_per_borough;
        """
response2 = london.query_to_pandas_safe(query2)
response2.head(30)

Unnamed: 0,borough,major_category,rank_per_borough,no_of_incidents
0,Barking and Dagenham,Theft and Handling,1,50999
1,Barking and Dagenham,Violence Against the Person,2,43091
2,Barking and Dagenham,Criminal Damage,3,18888
3,Barnet,Theft and Handling,1,87285
4,Barnet,Violence Against the Person,2,46565
5,Barnet,Burglary,3,36981
6,Bexley,Theft and Handling,1,40071
7,Bexley,Violence Against the Person,2,30037
8,Bexley,Criminal Damage,3,17244
9,Brent,Theft and Handling,1,72523
