In [1]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
import os.path
from collections import Counter
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline 
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
from tkinter import *

In [2]:
# cache directories for stop and search
cwd = os.getcwd()
direction_save_csv = os.path.join(cwd, '.cache', 'CSVs')
direction_save_json = os.path.join(cwd, '.cache', 'JSON')
data_path = os.path.join(cwd, 'data')


# incase we dont have the directories above, we then create them
if os.path.exists(direction_save_csv):
    pass
else:
    os.makedirs(direction_save_csv)

if os.path.exists(direction_save_json):
    pass
else:
    os.makedirs(direction_save_json)

if os.path.exists(data_path):
    pass
else:
    os.makedirs(data_path)



In [3]:
# now requesting for the data
# the code below means if the force file exists, open and load into forces

if os.path.exists(os.path.join(direction_save_json, 'forces.json')):                    
    with open(os.path.join(direction_save_json, 'forces.json')) as file:                
        forces = json.load(file)                                                   
# if the file does not exist, then fetch it and load into forces
else:                                                                              
    forces = requests.get('https://data.police.uk/api/forces')                     
    if forces.status_code == requests.codes.ok:                                    
        forces = forces.json()                                                     
        with open(os.path.join(direction_save_json, 'forces.json'), 'w') as f:
            
            json.dump(forces, f, indent=4)               # Save the file with indent=4
            
# next we create a list and add for force id            
force_id = []                                
for n in range(len(forces)):                       
    force_id.append(forces[n]['id'])  

print(force_id)            # to show the force id's

['avon-and-somerset', 'bedfordshire', 'cambridgeshire', 'cheshire', 'city-of-london', 'cleveland', 'cumbria', 'derbyshire', 'devon-and-cornwall', 'dorset', 'durham', 'dyfed-powys', 'essex', 'gloucestershire', 'greater-manchester', 'gwent', 'hampshire', 'hertfordshire', 'humberside', 'kent', 'lancashire', 'leicestershire', 'lincolnshire', 'merseyside', 'metropolitan', 'norfolk', 'north-wales', 'north-yorkshire', 'northamptonshire', 'northumbria', 'nottinghamshire', 'northern-ireland', 'south-wales', 'south-yorkshire', 'staffordshire', 'suffolk', 'surrey', 'sussex', 'thames-valley', 'warwickshire', 'west-mercia', 'west-midlands', 'west-yorkshire', 'wiltshire']


In [4]:
# making the months more presentable
months = {'1': 'January', '2': 'February', '3': 'March', '4': 'April', '5': 'May', '6': 'June', 
          '7': 'July','8': 'August', '9': 'September', '10': 'October', '11': 'November', '12': 'December'}


In [5]:
# defining functions for visualization

# first is explore force date

def explore_force_date(force, year, month):             # takes 3 arguments and fetch the data
    if type(force) == str and force in force_id and type(month) == str and month in months:
            
        date = year + '-' + month               # date in correct format
        # the code below will load the file, if it exist
        if os.path.exists(os.path.join(direction_save_json, str(force) + '_' + str(date) + ".json")):
            
            with open(os.path.join(direction_save_json, str(force) + '_' + str(date) + ".json")) as file:
                data = json.load(file)
                
        # otherwise request from the url        
        else:
            url = 'https://data.police.uk/api/stops-force?force=' + force + '&date=' + date
            fetch_data = requests.get(url)
            if fetch_data.status_code == requests.codes.ok:
                data = fetch_data.json()
                
                with open(os.path.join(direction_save_json, str(force) + '_' + str(date) + '.json'), 'w') as f:
                    
                    json.dump(data, f, indent=4)          # Save the file with indent=4
                    
            else:
                raise Exception("data cannot Load", force, date, "unaccessible")
                
    else:
        raise Exception("Error!!! Invalid syntax error")
    if len(data) < 2:
        raise Exception("data unaccessible now ('" + force + "', '" + year + "', '" + month + "')")
    else:
        return data


    
# the function below takes 3 arguments and plot the pie chart
# showing the ratio of non arrests and arrests made

def result(force, year, month):
    cat = Tk()
    cat.geometry('800x600')
    
    month_name = months[month]                            # getting the months name
    
    data = explore_force_date(force, year, month)         # fetching data from the function created earlier
    
    r = Counter(_['outcome'] for _ in data)               # counts of all different results
    
    # converting to dictionary
    r = dict(r)
    
    fig = Figure(figsize=(10,10))
    ax1 = fig.add_subplot(111)
    
    
    # creating a list for amount of arrests and results
    detail = [r['Arrest'], sum(r.values())-r['Arrest']]
    
    # pie chart details
    label = ['Arrests', 'Non-Arrests']

    explode = (0.1, 0)
    ax1.pie(detail, labels=label, autopct='%.2f %%', pctdistance=0.8, explode=explode)
    ax1.set_title(month_name + " " + year + "\nAnalzing Results of 'Stop & Search' Arrests in " + force.capitalize(),
              fontdict={'fontweight': 'bold', 'fontsize': 20})
    
    canvas = FigureCanvasTkAgg(fig, master=cat)
    canvas.get_tk_widget().pack()
    canvas.draw()
    

    plt.show()
    

    
# the function below takes 3 arguments and plot the histogram chart
# showing the ages of stop and search victims

def hist_age_chart(force, year, month):
    pet = Tk()
    pet.geometry('800x600')
    
    month_name = months[month]                                 # getting the months name
    
    data = explore_force_date(force, year, month)              # fetching data from the function created earlier
    
    r = Counter(_['age_range'] for _ in data if _['age_range'] is not None)     # count age distribution and save in r
    
    # converting to dictionary
    r = dict(r)
    
    fig, ax = plt.subplots(figsize=(10,10))
    
    print(r)
    ax.bar(*zip(*r.items()))
    
    # histogram chart details
    ax.set_title(month_name + " " + year + " in " + force.capitalize() +
              "\nAge Distribution of victims of 'Stop & Search'",
              fontdict={'fontweight': 'bold', 'fontsize': 40})

    ax.set_xlabel('Ages')
    ax.set_ylabel('Frequency')
    canvas = FigureCanvasTkAgg(fig, master=pet)
    canvas.get_tk_widget().pack()
    canvas.draw()
    
    plt.show()
    
    
# the force is to compare as many graphs as possible

def compare_many(data):
    tom = Tk()
    tom.geometry('800x600')
    
    counts = []
    forces = []
    for _ in data:
        print(_)
        for s in _:
            force = _[0]
            year = _[1]
            month = _[2]
        df_ss = explore_force_date(force, year, month)         # fetching the earlier data and saving it in df_ss
        r = Counter(the['age_range'] for the in df_ss if the['age_range'] is not None and the['age_range'] != 'under 10')
        r = dict(r)                # to dictionary
        
        counts.append(r)           # appending r into list
        forces.append(force)
    df_forces = pd.DataFrame(counts).transpose()
    print(forces)
    
    df_forces.columns = forces
    print(df_forces)
    
    x = np.arange(len(df_forces[force]))
    width = 0.50

    labels = list(df_forces.index)

    fig, ax = plt.subplots(figsize=(10,10))

    for f in forces:
        plt.bar(x, df_forces[f], width, label=str(f))
        x = x + width/2.5
        
    # designing the chart
    ax.set_ylabel('frequency')
    ax.set_title('Comparison of different places over age range using histogram')
    ax.set_xticks(x-width)
    ax.set_xticklabels(labels)
    ax.legend()
    fig.tight_layout()
    canvas = FigureCanvasTkAgg(fig, master=tom)
    canvas.get_tk_widget().pack()
    canvas.draw()
    
    plt.show()
        

        
def hist_gender_chart(force, year, month):
    gen = Tk()
    gen.geometry('800x600')
    
    month_name = months[month]                                 # getting the months name
    
    data = explore_force_date(force, year, month)              # fetching data from the function created earlier
    
    g = Counter(_['gender'] for _ in data if _['gender'] is not None)     # count age distribution and save in r
    
    # converting to dictionary
    g = dict(g)
    
    fig, ax = plt.subplots(figsize=(10,10))
    
    print(g)
    ax.bar(*zip(*g.items()))
    
    # histogram chart details
    ax.set_title(month_name + " " + year + " in " + force.capitalize() +
              "\nGender Distribution of victims of 'Stop & Search'",
              fontdict={'fontweight': 'bold', 'fontsize': 20})

    ax.set_xlabel('Genders')
    ax.set_ylabel('Frequency')
    canvas = FigureCanvasTkAgg(fig, master=gen)
    canvas.get_tk_widget().pack()
    canvas.draw()
    
    plt.show()

    
def hist_ethnic_chart(force, year, month):
    eth = Tk()
    eth.geometry('1300x900')
    
    month_name = months[month]                                 # getting the months name
    
    data = explore_force_date(force, year, month)              # fetching data from the function created earlier
    
    s = Counter(_['self_defined_ethnicity'] for _ in data if _['self_defined_ethnicity'] is not None)     # count age distribution and save in r
    
    # converting to dictionary
    s = dict(s)
    
    fig, ax = plt.subplots(figsize=(17,13))
    
    print(r)
    ax.barh(*zip(*s.items()))
    
    # histogram chart details
    ax.set_title(month_name + " " + year + " in " + force.capitalize() +
              "\nEthnic Distribution of victims of 'Stop & Search'",
              fontdict={'fontweight': 'bold', 'fontsize': 20})
    

    ax.set_xlabel('Ethnics')
    ax.set_ylabel('Frequency')
    canvas = FigureCanvasTkAgg(fig, master=eth)
    canvas.get_tk_widget().pack()
    canvas.draw()
    
    plt.show()
    

In [6]:
#compare_many([['city-of-london', '2020', '1'], ['cleveland', '2020', '1']])

In [7]:
#hist_age_chart('city-of-london', '2020', '1')