### Load Libraries

In [1]:
import pandas as pd

import plotly
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import random

random.seed(1234)

### Import Azure Functions

In [2]:
import os
import requests
from pprint import pprint
import pandas as pd

# Fill in your credentials
subscription_key = ...
endpoint = ...

"""
The code in this section isn't required, unless you want to hide your subscription credentials from the public. 
Look into python-dotenv for more information.
authenticate_client() uses the Text Analytics SDK to create a client

# from dotenv import load_dotenv
# load_dotenv()
# subscription_key = os.getenv('SUBSCRIPTION_KEY')
# endpoint = os.getenv('ENDPOINT')

# def authenticate_client():
#     ta_credential = AzureKeyCredential(subscription_key)
#     text_analytics_client = TextAnalyticsClient(
#             endpoint= endpoint, credential=ta_credential)
#     return text_analytics_client
"""

def sentiment_analysis_example(documents):
    sentiment_url = endpoint + "/text/analytics/v3.0/sentiment"
    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    response = requests.post(sentiment_url, headers=headers, json=documents)
    sentiments = response.json()

    print("Printing sentiments ... \n")
    pprint(sentiments)
    return sentiments


def extract_key_phrases(documents):
    keyphrase_url = endpoint + "/text/analytics/v3.0/keyphrases"
    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    response = requests.post(keyphrase_url, headers=headers, json=documents)
    key_phrases = response.json()

    print("Printing key phrases ... \n")
    pprint(key_phrases)
    return key_phrases


def identify_entities(documents):
    entities_url = endpoint + "/text/analytics/v3.0/entities/recognition/general"
    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    response = requests.post(entities_url, headers=headers, json=documents)
    entities = response.json()
    pprint(entities)


def convert_text_to_JSON(data):
    """
    Convert text data to the format required by the
    Text Analytics API. Example format included below in the main function.
    """
    pass


def parse_output(output_JSON):
    """
    Convert the response body from the API request to
    select the values you want.
    """
    pass

#     documents = {"documents": [
#         {"id": "1", "language": "en",
#             "text": "I do not like this hammer made by Black & Decker. It does not work correctly. I want to request a return."},
#         {"id": "2", "language": "es",
#             "text": "I've been trying to talk to someone about my sink problem. It won't hold all of my fish."}
#     ]}

#     # Uncomment the line below if you choose to use the SDK in the future
#     # client = authenticate_client()
#     sentiments = sentiment_analysis_example(documents)
#     key_phrases = extract_key_phrases(documents)
#     entities = identify_entities(documents)

### Import Data

In [3]:
data = pd.read_csv("SPD_officer_involved_shooting_data.csv")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   FRB #                 75 non-null     object 
 1   Incident Number       156 non-null    int64  
 2   Date / Time           156 non-null    object 
 3   Blurred Address       156 non-null    object 
 4   Longitude             156 non-null    float64
 5   Latitude              156 non-null    float64
 6   City                  156 non-null    object 
 7   State                 156 non-null    object 
 8   Rank                  156 non-null    object 
 9   Officer Gender        156 non-null    object 
 10  Officer Race          156 non-null    object 
 11  Years of SPD Service  154 non-null    object 
 12  Officer Injured       145 non-null    object 
 13  Number of Rounds      156 non-null    object 
 14  Subject Gender        156 non-null    object 
 15  Subject Race          1

In [5]:
data.describe()

Unnamed: 0,Incident Number,Longitude,Latitude
count,156.0,156.0,156.0
mean,20127050000000.0,-122.195231,47.603107
std,40564070000.0,1.505809,0.26332
min,20050000000000.0,-122.411392,44.414612
25%,20090000000000.0,-122.339003,47.5956
50%,20140000000000.0,-122.323604,47.618404
75%,20160000000000.0,-122.290325,47.6762
max,20190000000000.0,-103.514147,47.729558


In [6]:
data.columns

Index(['FRB #', 'Incident Number', 'Date / Time', 'Blurred Address',
       'Longitude', 'Latitude', 'City', 'State', 'Rank', 'Officer Gender',
       'Officer Race', 'Years of SPD Service', 'Officer Injured',
       'Number of Rounds', 'Subject Gender', 'Subject Race', 'Subject DOB',
       'Subject Age', 'Subject Weapon', 'Type of Weapon', 'Fatal', 'On-duty',
       'Disposition', 'Officer Disciplined?', 'Summary'],
      dtype='object')

In [7]:
data.head(10)

Unnamed: 0,FRB #,Incident Number,Date / Time,Blurred Address,Longitude,Latitude,City,State,Rank,Officer Gender,...,Subject Race,Subject DOB,Subject Age,Subject Weapon,Type of Weapon,Fatal,On-duty,Disposition,Officer Disciplined?,Summary
0,FRB 05-01,20050000118193,03/21/2005 06:28:00 PM,65XX BLOCK OF RAINIER AV S,-122.273741,47.543815,Seattle,WA,Officer,Male,...,Asian,1975-11-03T00:00:00.000,29,Yes,Knife,Yes,Yes,Missing,Missing,"On March 21st, 2005, at approximately 5:20 P.M..."
1,FRB 05-03,20050000174022,04/29/2005 03:30:00 AM,65XX BLOCK OF 1 AV S,-122.334513,47.544177,Seattle,WA,Officer,Male,...,White,1969-08-02T00:00:00.000,36,No,,Yes,Yes,Justified,No,"On April 29th, 2005, at approximately 3:35 A.M..."
2,FRB 05-04,20050000256303,06/20/2005 11:30:00 AM,7XX BLOCK OF STEWART ST,-122.335725,47.61462,Seattle,WA,Officer,Male,...,White,1952-11-15T00:00:00.000,53,Yes,"Grenade, backpack reported to have explosives",Yes,Yes,Justified,No,"On June 20th, 2005, at approximately 11:30 A.M..."
3,FRB 05-04,20050000256303,06/20/2005 12:30:00 PM,7XX BLOCK OF STEWART ST,-122.335725,47.61462,Seattle,WA,Officer,Male,...,White,1952-11-15T00:00:00.000,53,Yes,"Grenade, backpack reported to have explosives",Yes,Yes,Justified,No,"On June 20th, 2005, at approximately 11:30 A.M..."
4,FRB 05-05,20050000286240,07/08/2005 01:48:00 PM,16 AV / E UNION ST,-122.311474,47.612908,Seattle,WA,Officer,Male,...,Black or African American,1957-01-28T00:00:00.000,48,Yes,Screwdriver,No,Yes,Justified,No,"On July 8th, 2005, at approximately 1:45 P.M.,..."
5,FRB 05-06,20050000388203,09/10/2005 03:45:00 AM,3XX BLOCK OF 9 AV,-122.323444,47.604128,Seattle,WA,Officer,Male,...,White,1962-02-18T00:00:00.000,44,No,,No,Yes,Not Justified,Yes,"On September 10th, 2005, at approximately 3:45..."
6,FRB 05-07,20050000407965,09/23/2005 02:40:00 AM,XX BLOCK OF BLANCHARD ST,-122.346295,47.61177,Seattle,WA,Officer,Male,...,White,1983-11-23T00:00:00.000,22,Yes,6 shot .357 revolver,No,Yes,Justified,No,"On September 23rd, 2005, at approximately 2:00..."
7,FRB 05-07,20050000407965,09/23/2005 03:40:00 AM,XX BLOCK OF BLANCHARD ST,-122.346295,47.61177,Seattle,WA,Officer,Male,...,White,1983-11-23T00:00:00.000,22,Yes,6 shot .357 revolver,No,Yes,Justified,No,"On September 23rd, 2005, at approximately 2:00..."
8,FRB 05-08,20050000499750,11/22/2005 12:46:00 PM,64XX BLOCK OF CALIFORNIA AV SW,-122.387206,47.54568,Seattle,WA,Officer,Male,...,White,1952-01-13T00:00:00.000,54,No,,No,Yes,Justified,No,"On November 22nd, 2005, at approximately 12:45..."
9,FRB 06-01,20050000530964,12/15/2005 01:44:00 AM,8XX BLOCK OF 9 AV,-122.326382,47.607339,Seattle,WA,Officer,Male,...,Black or African American,1982-07-05T00:00:00.000,23,Yes,"Mac-10, 9 mm machine pistol",No,Yes,Justified,No,"On December 15, 2005, at approximately 1:44 A...."


In [8]:
data.corr()

Unnamed: 0,Incident Number,Longitude,Latitude
Incident Number,1.0,-0.087949,0.123753
Longitude,-0.087949,1.0,-0.977187
Latitude,0.123753,-0.977187,1.0


In [10]:
data['Summary'].head(10)

0    On March 21st, 2005, at approximately 5:20 P.M...
1    On April 29th, 2005, at approximately 3:35 A.M...
2    On June 20th, 2005, at approximately 11:30 A.M...
3    On June 20th, 2005, at approximately 11:30 A.M...
4    On July 8th, 2005, at approximately 1:45 P.M.,...
5    On September 10th, 2005, at approximately 3:45...
6    On September 23rd, 2005, at approximately 2:00...
7    On September 23rd, 2005, at approximately 2:00...
8    On November 22nd, 2005, at approximately 12:45...
9    On December 15, 2005, at approximately 1:44 A....
Name: Summary, dtype: object