In [4]:
# Import Dependancies
import os
import numpy
import pandas as pd
from functools import reduce
import re


# import matplotlib.pyplot as plt

# import numpy as np
# import requests
# import time

# import scipy.stats as st
# import datetime

# from scipy.stats import linregress
# from pprint import pprint

# from config import weather_api_key
# from config import geoapify_key

# from citipy import citipy

In [38]:
# Create a generalized working directory reference
working_dir = os.path.join(os.getcwd(),"UN_crime_reports","RAW_data_csv")

# Define the file names and chosen df names for crime type
crime_files = {
    'corruption_and_economic_crime.csv': 'UN_economic_crime_df',
    'Drug_related_crimes_total.csv' : 'UN_drug_crime_df',
    'firearms_trafficking.csv': 'UN_firearms_crime_df',
    'intentional_homicide_edit_country_post2008.csv': 'UN_homicide_crime_df',
    'offences_trafficing_persons.csv': 'UN_human_trafficking_df',
    'violent_and_sexual_crime.csv': 'UN_sex_crime_df',
}

population_files = {'WPP2022_TotalPopulationBySex_2016_2022.csv': 'World_pop_df'}

UN_crime_df_list = []
# Create loop so that I do not have to change and read text when I am too tired
# Also create a list of crime df generated to make multiple merges easier!!!
for filename, df_name in crime_files.items():
    file_path = os.path.join(working_dir, filename)
    vars()[df_name] = pd.read_csv(file_path)
    UN_crime_df_list.append(df_name)

UN_pop_df_list = []
for filename, df_name in population_files.items():
    file_path = os.path.join(working_dir, filename)
    vars()[df_name] = pd.read_csv(file_path)
    UN_pop_df_list.append(df_name)

# Remove columns that are not needed
# Format economic crime
UN_economic_crime_df=UN_economic_crime_df.drop(columns=['Subregion',
                                                        'Region',
                                                        'Iso3_code',
                                                        'Indicator',
                                                        'Dimension',
                                                        'Sex',
                                                        'Age'
                                                        ])
UN_economic_crime_df = UN_economic_crime_df[UN_economic_crime_df['Unit of measurement'].str.contains('Counts')]
UN_economic_crime_df['Crime Type']='Economic'

# Format drug crime
UN_drug_crime_df = UN_drug_crime_df.drop(columns=['SubRegion',
                                                  'Specify year',
                                                  'Region',
                                                  'Offences related to drug use and/or drug possession',
                                                  'Offences related to drug trafficking',
                                                  'Other drug-related offences'
                                                  ])
UN_drug_crime_df = UN_drug_crime_df.rename(columns={'msName':'Country','Calculated total':'VALUE'})
UN_drug_crime_df['Category']= 'Drug Trafficing, Use, and Posession'
UN_drug_crime_df['Unit of measurement']= 'Counts'
UN_drug_crime_df['Crime Type']='Drug Related'

#Move the columns to match other dfs:   https://sparkbyexamples.com/pandas/pandas-change-position-of-a-column/
drug_cols=list(UN_drug_crime_df.columns)
values_col = len(drug_cols)-2
drug_cols.append(drug_cols.pop(values_col))
UN_drug_crime_df = UN_drug_crime_df[drug_cols]


# Format firearms crime
UN_firearms_crime_df=UN_firearms_crime_df.drop(columns=['Region',
                                                        'Subregion',
                                                        'Iso3_code',
                                                        'Indicator',
                                                        'Dimension',
                                                        'Sex',
                                                        'Age'
                                                        ])
UN_firearms_crime_df['Crime Type']='Firearms'

# Format homicide crime
UN_homicide_crime_df=UN_homicide_crime_df.drop(columns=['Region',
                                                        'Subregion',
                                                        'Iso3_code',
                                                        'Indicator',
                                                        'Dimension',
                                                        'Sex',
                                                        'Age'
                                                        ])
UN_homicide_crime_df = UN_homicide_crime_df[UN_homicide_crime_df['Unit of measurement'].str.contains('Counts')]
UN_homicide_crime_df['Crime Type']='Homicide'

# Format human trafficing crime
UN_human_trafficking_df=UN_human_trafficking_df.drop(columns=['Region',
                                                        'Subregion',
                                                        'Iso3_code',
                                                        'Indicator',
                                                        'Dimension',
                                                        'Sex',
                                                        'Age'
                                                        ])
UN_human_trafficking_df = UN_human_trafficking_df.rename(columns={'txtVALUE':'VALUE'})
UN_human_trafficking_df['Crime Type'] = 'Human Trafficking'

# Format sex crime
UN_sex_crime_df=UN_sex_crime_df.drop(columns=['Region',
                                              'Subregion',
                                              'Iso3_code',
                                              'Indicator',
                                              'Dimension',
                                              'Sex',
                                              'Age'
                                              ])
UN_sex_crime_df = UN_sex_crime_df[UN_sex_crime_df['Unit of measurement'].str.contains('Counts')]
UN_sex_crime_df['Crime Type']= "Violent & Sex Assault"

#Format world pop
UN_world_pop_df = World_pop_df.rename(columns={'Location':'Country','Time':'Year'})
UN_world_pop_df=UN_world_pop_df.drop(columns=[
    'SortOrder',
    'LocID',
    'LocTypeID',
    'Notes',
    'SDMX_code',
    'LocTypeName',
    'ParentID',
    'VarID',
    'Variant',
    'MidPeriod',
    'PopMale',
    'PopFemale',
    'PopDensity'
])




In [39]:
# Check economic crime
UN_economic_crime_year = UN_economic_crime_df.groupby(['Country','Year']).sum([])['VALUE']
UN_economic_crime_type = UN_economic_crime_df['Category'].value_counts()
UN_economic_crime_type

Theft                                                                        418
Burglary                                                                     380
Theft: of a motorized vehicle                                                376
Corruption: Bribery                                                          360
Corruption                                                                   354
Smuggling of migrants                                                        350
Fraud                                                                        343
Corruption: Other acts of corruption                                         318
Money laundering                                                             296
Fraud: Cyber-related (Cy)                                                    187
Acts that cause environmental pollution or degradation                       171
Unlawful access to a computer system                                         165
Acts that result in the depl

In [40]:
UN_economic_crime_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type
0,Albania,Smuggling of migrants,2016,Counts,63.0,CTS,Economic
1,Albania,Theft: of a motorized vehicle,2016,Counts,298.0,CTS,Economic
2,Albania,Fraud: Cyber-related (Cy),2016,Counts,51.0,CTS,Economic
3,Albania,Corruption,2016,Counts,1032.0,CTS,Economic
4,Albania,Corruption: Bribery,2016,Counts,256.0,CTS,Economic


In [41]:
# Check drug crime
UN_drug_crime_df.head()

UN_drug_crime_year = UN_drug_crime_df.groupby(['Country','Year']).sum([])['VALUE']
UN_drug_crime_type = UN_drug_crime_df['Category'].value_counts()
UN_drug_crime_type



Drug Trafficing, Use, and Posession    294
Name: Category, dtype: int64

In [42]:
UN_drug_crime_df.head()

Unnamed: 0,Country,Year,VALUE,Category,Crime Type,Unit of measurement
0,Australia,2020,168306.0,"Drug Trafficing, Use, and Posession",Drug Related,Counts
1,Andorra,2020,216.0,"Drug Trafficing, Use, and Posession",Drug Related,Counts
2,Austria,2020,40293.0,"Drug Trafficing, Use, and Posession",Drug Related,Counts
3,Algeria,2020,51753.0,"Drug Trafficing, Use, and Posession",Drug Related,Counts
4,Armenia,2020,1567.0,"Drug Trafficing, Use, and Posession",Drug Related,Counts


In [43]:
# check firearms crime
UN_firearms_crime_df.head()

UN_firearms_crime_year = UN_firearms_crime_df.groupby(['Country','Year']).sum([])['VALUE']
UN_firearms_crime_type = UN_firearms_crime_df['Category'].value_counts()
UN_firearms_crime_type



pistol                                                                         393
rifle (Inlcuding Carbine)                                                      377
shotgun (Including Short Carbine)                                              377
revolver                                                                       374
other weapons                                                                  353
machine gun                                                                    348
submachine gun                                                                 335
Unknown status on type                                                         330
unknown                                                                        249
otherwise illicitly manufactured                                               245
other                                                                          239
uniquely marked                                                                220
no m

In [44]:
UN_firearms_crime_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type
0,Angola,other,2018,Counts,0,IAFQ,Firearms
1,Angola,unknown,2018,Counts,0,IAFQ,Firearms
2,Angola,Altered markings,2018,Counts,0,IAFQ,Firearms
3,Angola,illicit manufacture,2018,Counts,0,IAFQ,Firearms
4,Angola,illicit trafficking,2018,Counts,0,IAFQ,Firearms


In [45]:
# check homicide crime
UN_homicide_crime_df.head()

UN_homicide_crime_year = UN_homicide_crime_df.groupby(['Country','Year']).sum([])['VALUE']
UN_homicide_crime_type = UN_homicide_crime_df['Category'].value_counts()
UN_homicide_crime_type

Intimate partner or family member                      943
Intimate partner or family member: Intimate partner    834
Intimate partner or family member: Family member       798
National citizens                                      784
Other Perpetrator known to the victim                  715
Relationship to perpetrator is not known               711
Foreign citizens                                       704
Perpetrator unknown                                    690
Other criminal activities                              514
Interpersonal homicide                                 474
Organized criminal groups or gangs                     441
Unknown types of homicide                              431
Socio-political homicide                               362
Socio-political homicide - terrorist offences          340
Firearms or explosives - firearms                      311
Another weapon - sharp object                          306
Without a weapon/ other Mechanism                      2

In [46]:
UN_homicide_crime_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type
0,Albania,Firearms or explosives - firearms,2016,Counts,43.0,CTS,Homicide
1,Albania,Another weapon - sharp object,2016,Counts,18.0,CTS,Homicide
2,Albania,Without a weapon/ other Mechanism,2016,Counts,10.0,CTS,Homicide
3,Albania,2nd largest city,2016,Counts,20.0,CTS,Homicide
4,Albania,Intimate partner or family member,2016,Counts,20.0,CTS,Homicide


In [47]:
# check human trafficing crime
UN_human_trafficking_df.head()

UN_human_trafficing_year = UN_human_trafficking_df.groupby(['Country','Year']).sum([])['VALUE']
UN_human_trafficing_type = UN_human_trafficking_df['Category'].value_counts()
UN_human_trafficing_type

Sexual exploitation            759
Forced labour                  533
Other forms of exploitation    481
Nationals                      123
Nigeria                         70
                              ... 
The Ukraine                      1
Bosnia Erzegovina                1
Bangali                          1
Japan                            1
97 albanian                      1
Name: Category, Length: 197, dtype: int64

In [48]:
UN_human_trafficking_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type
0,Angola,Nationals,2017,Counts,23,GLOTIP,Human Trafficking
1,Angola,Forced labour,2017,Counts,15,GLOTIP,Human Trafficking
2,Angola,Forced labour,2017,Counts,9,GLOTIP,Human Trafficking
3,Angola,Forced labour,2017,Counts,24,GLOTIP,Human Trafficking
4,Angola,Nationals,2017,Counts,6,GLOTIP,Human Trafficking


In [49]:
# check sex crime
UN_sex_crime_df.head()

UN_sex_crime_year = UN_sex_crime_df.groupby(['Country','Year']).sum([])['VALUE']
UN_sex_crime_type = UN_sex_crime_df['Category'].value_counts()
UN_sex_crime_type


Intimate partner or family member                                    508
Serious assault                                                      488
Sexual violence: Rape                                                479
Relationship to perpetrator is not known                             477
Sexual violence                                                      449
Kidnapping                                                           436
Sexual violence: Sexual assault                                      417
Perpetrator unknown to the victim                                    410
Other Perpetrator known to the victim                                379
Robbery                                                              367
Sexual violence: Other acts of sexual violence                       311
Sexual Exploitation                                                  286
Child pornography                                                    186
Acts intended to induce fear or emotional distress 

In [50]:
UN_sex_crime_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type
0,Albania,Serious assault,2016,Counts,118.0,CTS,Violent & Sex Assault
1,Albania,Kidnapping,2016,Counts,4.0,CTS,Violent & Sex Assault
2,Albania,Sexual violence,2016,Counts,120.0,CTS,Violent & Sex Assault
3,Albania,Sexual violence: Rape,2016,Counts,44.0,CTS,Violent & Sex Assault
4,Albania,Sexual violence: Sexual assault,2016,Counts,76.0,CTS,Violent & Sex Assault


In [51]:
UN_world_pop_df.head()

Unnamed: 0,ISO3_code,ISO2_code,Country,Year,PopTotal
0,BDI,BI,Burundi,2016,10903.327
1,COM,KM,Comoros,2016,746.232
2,DJI,DJ,Djibouti,2016,1023.261
3,ERI,ER,Eritrea,2016,3365.287
4,ETH,ET,Ethiopia,2016,105293.228


In [65]:
# Concatonate all the crime dataframes together in one swoop in a loop!!!
UN_all_crime_df = pd.DataFrame()

for df_name in UN_crime_df_list:
    df = eval(df_name)
    UN_all_crime_df = pd.concat([UN_all_crime_df,df])

UN_all_grouped = UN_all_crime_df.groupby(['Country','Crime Type']).value_counts()
UN_all_grouped.head()




Country  Crime Type  Category                                                       Year  Unit of measurement  VALUE  Source
Albania  Economic    Acts involving the movement of dumping of waste                2016  Counts               0.0    CTS       1
                     Unlawful interference with a computer system or computer data  2019  Counts               70.0   CTS       1
                                                                                    2018  Counts               55.0   CTS       1
                                                                                    2017  Counts               68.0   CTS       1
                                                                                    2016  Counts               72.0   CTS       1
dtype: int64

In [66]:
# Merge in population dataframe

UN_Combined_crime_pop_df = pd.merge(UN_all_crime_df,UN_world_pop_df,how='left',on=['Country','Year'])
UN_Combined_crime_pop_df.head()

Unnamed: 0,Country,Category,Year,Unit of measurement,VALUE,Source,Crime Type,ISO3_code,ISO2_code,PopTotal
0,Albania,Smuggling of migrants,2016,Counts,63.0,CTS,Economic,ALB,AL,2881.063
1,Albania,Theft: of a motorized vehicle,2016,Counts,298.0,CTS,Economic,ALB,AL,2881.063
2,Albania,Fraud: Cyber-related (Cy),2016,Counts,51.0,CTS,Economic,ALB,AL,2881.063
3,Albania,Corruption,2016,Counts,1032.0,CTS,Economic,ALB,AL,2881.063
4,Albania,Corruption: Bribery,2016,Counts,256.0,CTS,Economic,ALB,AL,2881.063
