<a href="https://colab.research.google.com/github/neerajthandayan/CourseProject/blob/main/Ethnic_Diversity_Calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Calculating Ethinic Diversity in Different Police Force Areas

In [1]:
# Importing Libraries

import numpy as np
import pandas as pd

In [2]:
# Loading Data

data = pd.read_csv('/content/drive/MyDrive/Project Data/Arrest_data.csv',header=0)
data.head(2)

Unnamed: 0,Measure,Time,Time_type,Ethnicity,Ethnicity_type,Gender,Gender_type,Age_Group,Age_Group_type,Geography,Geography_type,Number of arrests,"Population by ethnicity, gender, and PFA (based on 2011 Census)","Rate per 1,000 population by ethnicity, gender, and PFA","Proportion of arrests of this ethnicity, in this year, of this gender and in this police force area (excludes unreported)",Notes
0,Number of arrests,2018/19,Financial year,All,All,All,Male/Female,10 - 17 years,Arrest age group categories,All,Police Force Area,60208,N/A for individual age groups,N/A for individual age groups,N/A for individual age groups,Excludes Lancashire (both arrest and populatio...
1,Number of arrests,2018/19,Financial year,All,All,Female,Male/Female,10 - 17 years,Arrest age group categories,All,Police Force Area,9290,N/A for individual age groups,N/A for individual age groups,N/A for individual age groups,Excludes Lancashire (both arrest and populatio...


In [3]:
data.columns 

Index(['Measure', 'Time', 'Time_type', 'Ethnicity', 'Ethnicity_type', 'Gender',
       'Gender_type', 'Age_Group', 'Age_Group_type', 'Geography',
       'Geography_type', 'Number of arrests',
       'Population by ethnicity, gender, and PFA (based on 2011 Census)',
       'Rate per 1,000 population by ethnicity, gender, and PFA',
       'Proportion of arrests of this ethnicity, in this year, of this gender and in this police force area (excludes unreported)',
       'Notes'],
      dtype='object')

In [4]:
data['Ethnicity_type'].unique()

array(['All', 'ONS 2001 16+1', 'ONS 2001 5+1', 'Unreported'], dtype=object)

In [5]:
# Dropping unnecessary columns 

data.drop(columns=['Measure','Time_type','Rate per 1,000 population by ethnicity, gender, and PFA','Geography_type','Number of arrests','Gender_type','Age_Group_type',
                   'Proportion of arrests of this ethnicity, in this year, of this gender and in this police force area (excludes unreported)',
                   'Notes'], inplace=True)

In [6]:
# Slicing Data  and renaming columns

ndata = data.loc[data['Age_Group'] == "All"]
ndata = ndata.loc[data['Gender'] == "All"]
ndata = ndata.loc[data['Ethnicity_type'] == "ONS 2001 5+1"]
ndata = ndata.loc[data['Time'] == "2018/19"]
ndata = ndata.loc[data['Geography'] != "All"]
newdata = ndata.drop(columns=['Time','Ethnicity_type','Gender','Age_Group']).copy()
newdata.rename(columns={'Population by ethnicity, gender, and PFA (based on 2011 Census)':'Population'},inplace=True)

# Droping NaN Values

newdata.dropna(inplace=True)

# Resetting Index

newdata.reset_index(inplace=True, drop=True)

In [7]:
newdata.loc[newdata['Geography'] == 'London, City of']

Unnamed: 0,Ethnicity,Geography,Population


In [8]:
# Cleaning Population Data

c = newdata['Population'].copy()

for i in range(len(c)):
  if type(c.iloc[i]) == str:
    c.iloc[i] = c.iloc[i].replace(',','')
    c.iloc[i]= pd.to_numeric(c.iloc[i])
  else:
    continue

newdata['Population'] = c

In [9]:
# Creating Geography Object

PFA = newdata['Geography'].unique()
PFA

array(['Avon and Somerset', 'Bedfordshire', 'Cambridgeshire', 'Cheshire',
       'Cleveland', 'Cumbria', 'Derbyshire', 'Devon and Cornwall',
       'Dorset', 'Durham', 'Dyfed-Powys', 'Essex', 'Gloucestershire',
       'Greater Manchester', 'Gwent', 'Hampshire', 'Hertfordshire',
       'Humberside', 'Kent', 'Lancashire', 'Leicestershire',
       'Lincolnshire', 'Merseyside', 'Metropolitan Police', 'Norfolk',
       'North Wales', 'North Yorkshire', 'Northamptonshire',
       'Northumbria', 'Nottinghamshire', 'South Wales', 'South Yorkshire',
       'Staffordshire', 'Suffolk', 'Surrey', 'Sussex', 'Thames Valley',
       'Warwickshire', 'West Mercia', 'West Midlands', 'West Yorkshire',
       'Wiltshire'], dtype=object)

## Calculating Ethinic Diversity

In [10]:
# Creating Columns for Relative Population Data and Squared Relative Population

newdata['rel_pop'] = np.NaN
newdata['rl_sd'] = np.NaN

In [11]:
# Populating Relative Population Data and Squared Relative Population

u = []

for i in PFA:
  a = newdata.loc[newdata['Geography'] == i].copy()
  a['rel_pop'] = (a['Population']/a['Population'].sum())*100
  for x in a['rel_pop']:
    u.append(x)

newdata['rel_pop'] = u
newdata['rl_sd'] = np.square(newdata['rel_pop'].copy())

In [12]:
# Making Herfindahl-Hirschman concentration index object

Ethnic_Diversity = []

for q in PFA:
  Ethnic_Diversity.append(10000 - newdata.loc[newdata['Geography'] == q]['rl_sd'].sum())

In [13]:
# Craeting DataFrame for Ethnic Diversity

mydict = {'Geography': PFA, 'Diversity_Score': Ethnic_Diversity}
fdata = pd.DataFrame.from_dict(mydict)
fdata.sort_values(by='Diversity_Score', ascending=False)

Unnamed: 0,Geography,Diversity_Score
23,Metropolitan Police,5911.741759
39,West Midlands,4702.489206
1,Bedfordshire,3784.505773
20,Leicestershire,3608.433115
40,West Yorkshire,3146.78857
13,Greater Manchester,2875.640579
36,Thames Valley,2757.822798
16,Hertfordshire,2280.058092
29,Nottinghamshire,2074.393456
2,Cambridgeshire,1819.170482


In [14]:
# Saving Ethinic Diversity CSV

fdata.to_csv('ethnicdiversity.csv')

## Creating Population Dataset

In [15]:
popdata = newdata.drop(columns=['rel_pop','rl_sd']).copy()

In [16]:
popdata = popdata.pivot(columns='Ethnicity',index='Geography')
popdata.columns = popdata.columns.droplevel(0)
popdata = popdata.reset_index().copy()
popdata.to_csv('popdata.csv')

In [17]:
popdata

Ethnicity,Geography,Asian,Black,Mixed,Other,White
0,Avon and Somerset,33005,30923,28277,15380,1491970
1,Bedfordshire,81480,29725,18456,8753,476647
2,Cambridgeshire,39535,10174,16029,12572,726531
3,Cheshire,12794,3264,10423,5230,995998
4,Cleveland,17419,3156,5762,4434,526456
5,Cumbria,2913,579,2504,1605,492257
6,Derbyshire,36871,10090,14351,7281,949845
7,Devon and Cornwall,11694,4106,15645,9467,1627306
8,Dorset,10694,3208,9507,5995,714637
9,Durham,5114,1058,4240,3030,605364
