In [1]:
import pandas as pd
import numpy as np
from functools import reduce

# Read crime data
dataCrime        = pd.read_csv('data/crimes/crimes.csv')
dataVioCrm       = pd.read_csv('data/crimes/violent_crime_incidence_2016.csv')
dataRapeVictm    = pd.read_csv('data/crimes/rape_victims_2016.csv')
dataCyberCrm     = pd.read_csv('data/crimes/cyber_crime.csv')
dataKidnapping   = pd.read_csv('data/crimes/kidnapping_2016.csv')
dataTrafficing   = pd.read_csv('data/crimes/trafficing_2016.csv')
dataMissChildren = pd.read_csv('data/crimes/missing_traced_children_2016.csv')
dataChldCrm      = pd.read_csv('data/crimes/crime_against_children.csv')
dataJuvenileCrm  = pd.read_csv('data/crimes/juvenile_crime.csv')
dataJuvArrest    = pd.read_csv('data/crimes/juvenile_arrested_bckgrnd_2016.csv')
dataMurderVictm  = pd.read_csv('data/crimes/murder_victims_2016.csv')
dataMrdrMotive   = pd.read_csv('data/crimes/murder_motives_2016.csv')
dataCorruption   = pd.read_csv('data/crimes/coruption_case.csv')

# Crime data functions

In [2]:
# Geo code and geo level
geography = pd.read_csv('data/geography.csv',encoding='UTF-8')
geo = geography[['geo_level','geo_code','name']]

# Calculating country data
def setCountryData(df,cols):
    for x in cols:
         df.loc[df['geo_level'] == 'country', x]= df.loc[df['geo_level'] == 'state', x].sum()
    return df;

def getCrimeData(df,varname):
    
    df   = df.rename(index=str, columns={'STATE NAME': 'name'})
    cols = list(df.columns.values)[1:]
    dfgeo =  geo.merge(df,on=['name'],how='left')
    dfgeogeodf = setCountryData(dfgeo,cols)
    dfgeoFinal = pd.melt( dfgeogeodf, id_vars=['geo_level', 'geo_code','name'], var_name=varname, value_name="total")
    dfgeoFinal[varname] = dfgeoFinal[varname].str.split('(').str[1].str.split(')').str[0].str.capitalize()
    dfgeoFinal = dfgeoFinal.fillna(0)
    dfgeoFinal = dfgeoFinal.sort_values(by=['geo_level','name']).reset_index().drop(['index'],axis=1).drop(['name'],axis=1)
    dfgeoFinal['total'] = dfgeoFinal['total'].astype(int)
    
    return dfgeoFinal;

def getRapeData(df,varname):
        df   = df.rename(index=str, columns={'STATE NAME': 'name'})
        cols = list(df.columns.values)[1:]
        dfgeo =  geo.merge(df,on=['name'],how='left')
        dfgeogeodf = setCountryData(dfgeo,cols)
        dfgeoFinal = pd.melt( dfgeogeodf, id_vars=['geo_level', 'geo_code','name'], var_name=varname, value_name="total")
        #dfgeoFinal[varname] = dfgeoFinal[varname].str.split('(').str[1].str.split(')').str[0].str.capitalize()
        dfgeoFinal = dfgeoFinal.fillna(0)
        dfgeoFinal = dfgeoFinal.sort_values(by=['geo_level','name']).reset_index().drop(['index'],axis=1).drop(['name'],axis=1)
        dfgeoFinal['total'] = dfgeoFinal['total'].astype(int)
      
        return dfgeoFinal

# Combined crime data

In [3]:
crimedf    = [dataCrime,dataVioCrm,dataCyberCrm,dataChldCrm,dataTrafficing,dataMissChildren,dataRapeVictm,dataKidnapping,dataJuvenileCrm,dataMurderVictm,dataCorruption,dataJuvArrest,dataMrdrMotive]
crimedf2016 = reduce(lambda left,right: pd.merge(left,right, on=['STATE NAME']),crimedf)
crimedf2016.head()

Unnamed: 0,Unnamed: 0_x,STATE NAME,CRIME (2014),CRIME (2015),CRIME (2016),CRIME PER SHARE (STATE),CRIME RATE,Unnamed: 0_y,VIOLENT (MURDER),VIOLENT (CULPABLE HOMICIDE),...,RAPE,CLASS CONFLICT,WITCH CRAFT,POLITICAL REASONS,LUNACY,HONOUR KILLING,COMMUNALISM,CHILD/ HUMAN SACRIFICE,OTHER CAUSES*,TOTAL MOTIVES
0,0,ANDHRA PRADESH,114604,110693,106774,3.6,206.4,0,1123,83,...,5,0,8,0,0,2,0,0,659,1123
1,1,ARUNACHAL PRADESH,2843,2968,2534,0.1,192.3,1,46,6,...,0,0,0,0,1,0,0,0,34,46
2,2,ASSAM,94337,103616,102250,3.4,313.9,2,1149,34,...,6,27,1,0,1,0,0,1,211,1149
3,3,BIHAR,177595,176973,164163,5.5,157.4,3,2581,150,...,5,9,0,26,1,3,0,0,44,2581
4,4,CHHATTISGARH,58200,56692,55029,1.8,211.7,4,957,37,...,6,2,17,4,4,1,0,2,546,957


# Violent crimes

In [4]:
viocrimescols =  ['STATE NAME','VIOLENT (MURDER)','VIOLENT (CULPABLE HOMICIDE)','VIOLENT (DOWRY DEATHS)','VIOLENT (INFANTICIDE)','VIOLENT (FOETICIDE)','VIOLENT (ATTEMPT TO MURDER)','VIOLENT (ATTEMPT TO HOMICIDE)','VIOLENT (GRIEVOUS HURT)','VIOLENT (KIDNAPPING & ABDUCTION)','VIOLENT (RAPE)','VIOLENT (ATTEMPT TO RAPE)','VIOLENT (RIOT)','VIOLENT (ROBERY)','VIOLENT (DACOITY)','VIOLENT (ARSON)']
viocrimes2016 = crimedf2016[viocrimescols]

viocrimeGeo2016 = getCrimeData(viocrimes2016,'violentcrimes')
viocrimeGeo2016.insert(2,'geo_version','2011')
viocrimeGeo2016.insert(4,'year','2016')
viocrimeGeo2016.head()

# getting default year data
viocrimesdefault =  viocrimeGeo2016.copy(deep=True)
viocrimesdefault['total'] = 0
viocrimesdefault['year'] = '2016'
viocrimesdefault

#viocrimeGeo2016.to_csv('data/violentcrimes_2016.csv', index=False)
#viocrimesdefault.to_csv('data/violentcrimes_default.csv', index=False)
viocrimeGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,violentcrimes,year,total
0,country,IN,2011,Murder,2016,30450
1,country,IN,2011,Culpable homicide,2016,3203
2,country,IN,2011,Dowry deaths,2016,7621
3,country,IN,2011,Infanticide,2016,93
4,country,IN,2011,Foeticide,2016,144


# Age wise rape victims in India

In [5]:
rapeviccols = ['STATE NAME','RAPE VICTIMS BELOW 6 YEARS','RAPE VICTIMS 6-11 YEARS','RAPE VICTIMS 12-15 YEARS','RAPE VICTIMS 16-17 YEARS','RAPE VICTIMS 18-29 YEARS','RAPE VICTIMS 30-44 YEARS','RAPE VICTIMS 45-59 YEARS','RAPE VICTIMS 60 YEARS & ABOVE']
rapevic2016 = crimedf2016[rapeviccols]

rapevic2016 = rapevic2016.rename(index=str, columns={'STATE NAME': 'name', 'RAPE VICTIMS BELOW 6 YEARS': '0-6','RAPE VICTIMS 6-11 YEARS':'6-11','RAPE VICTIMS 12-15 YEARS':'12-15','RAPE VICTIMS 16-17 YEARS':'16-17','RAPE VICTIMS 18-29 YEARS':'18-29','RAPE VICTIMS 30-44 YEARS':'30-44','RAPE VICTIMS 45-59 YEARS':'45-59','RAPE VICTIMS 60 YEARS & ABOVE':'60 & Above'})
rapevicGeo2016 = getRapeData(rapevic2016,'rape')
rapevicGeo2016.insert(2,'geo_version','2011')
rapevicGeo2016.insert(4,'year','2016')
rapevicGeo2016

# getting default year data
rapevicdefault =  rapevicGeo2016.copy(deep=True)
rapevicdefault['total'] = 0
rapevicdefault['year'] = '2016'
rapevicdefault

#rapevicGeo2016.to_csv('data/rapes_2016.csv', index=False)
#rapevicdefault.to_csv('data/rapes_default.csv', index=False)
rapevicGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,rape,year,total
0,country,IN,2011,0-6,2016,520
1,country,IN,2011,6-11,2016,1596
2,country,IN,2011,12-15,2016,6091
3,country,IN,2011,16-17,2016,8656
4,country,IN,2011,18-29,2016,16462


# kidnapping by gender

In [6]:
kidnapcols = ['STATE NAME','KIDNAPPED & ABDUCTED (MALE)','KIDNAPPED & ABDUCTED (FEMALE)']

kidnapped2016 = crimedf2016[kidnapcols]
kidnappedGeo2016 = getCrimeData(kidnapped2016,'kidnapgender')
kidnappedGeo2016.insert(2,'geo_version','2011')
kidnappedGeo2016.insert(4,'year','2016')


# getting default year data
kidnappeddefault =  kidnappedGeo2016.copy(deep=True)
kidnappeddefault['total'] = 0
kidnappeddefault['year'] = '2016'
kidnappeddefault

#kidnappedGeo2016.to_csv('data/kidnapping_2016.csv', index=False)
#kidnappeddefault.to_csv('data/kidnapping_default.csv', index=False)
kidnappedGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,kidnapgender,year,total
0,country,IN,2011,Male,2016,42924
1,country,IN,2011,Female,2016,115823
2,district,532,2011,Male,2016,0
3,district,532,2011,Female,2016,0
4,district,146,2011,Male,2016,0


# Kidnapped recovery by gender

In [6]:
kidnapreccols = ['STATE NAME','KIDNAPPED PERSON RECOVERED ALIVE (MALE)','KIDNAPPED PERSON RECOVERED ALIVE (FEMALE)','KIDNAPPED PERSON RECOVERED DEAD (MALE)','KIDNAPPED PERSON RECOVERED DEAD (FEMALE)']

kidnaprecovery2016    = crimedf2016[kidnapreccols]
kidnaprecoveryGeo2016 = getRapeData(kidnaprecovery2016,'kidnaprectemp')
kidnaprecoveryGeo2016.insert(3,'kidnaprecovery',kidnaprecoveryGeo2016['kidnaprectemp'].map(lambda x: 'Alive' if 'ALIVE' in x else 'Dead'))
kidnaprecoveryGeo2016.insert(4,'gender',kidnaprecoveryGeo2016['kidnaprectemp'].map(lambda x: 'Female' if 'FEMALE' in x else 'Male'))
kidnaprecoveryGeo2016.insert(2,'geo_version','2011')
kidnaprecoveryGeo2016.insert(6,'year','2016')
kidnaprecoveryGeo2016 =  kidnaprecoveryGeo2016.drop(['kidnaprectemp'],axis=1)
kidnaprecoveryGeo2016

# getting default year data
kidnaprecoverydefault =  kidnaprecoveryGeo2016.copy(deep=True)
kidnaprecoverydefault['total'] = 0
kidnaprecoverydefault['year'] = '2016'
kidnaprecoverydefault

#kidnaprecoveryGeo2016.to_csv('data/kidnaprecovery_2016.csv', index=False)
#kidnaprecoverydefault.to_csv('data/kidnaprecovery_default.csv', index=False)
kidnaprecoveryGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,kidnaprecovery,gender,year,total
0,country,IN,2011,Alive,Male,2016,18792
1,country,IN,2011,Alive,Female,2016,50482
2,country,IN,2011,Dead,Male,2016,182
3,country,IN,2011,Dead,Female,2016,143
4,district,532,2011,Alive,Male,2016,0


# Trafficking

In [7]:
trafficcols = ['STATE NAME','TRAFFICED BELOW 18 YRS (MALE)','TRAFFICED BELOW 18 YRS (FEMALE)','TRAFFICED ABOVE 18 YRS (MALE)','TRAFFICED ABOVE 18 YRS (FEMALE)']
trafficking2016    = crimedf2016[trafficcols]

traffickingGeo2016 = getRapeData(trafficking2016,'traffictemp')
traffickingGeo2016.insert(3,'trafficedage',traffickingGeo2016['traffictemp'].map(lambda x: 'Below 18 years' if 'BELOW 18 YRS' in x else 'Above 18 years'))
traffickingGeo2016.insert(4,'gender',traffickingGeo2016['traffictemp'].map(lambda x: 'Female' if 'FEMALE' in x else 'Male'))
traffickingGeo2016.insert(2,'geo_version','2011')
traffickingGeo2016.insert(6,'year','2016')
traffickingGeo2016 =  traffickingGeo2016.drop(['traffictemp'],axis=1)
traffickingGeo2016

# getting default year data
traffickingdefault =  traffickingGeo2016.copy(deep=True)
traffickingdefault['total'] = 0
traffickingdefault['year'] = '2016'
traffickingdefault

#traffickingGeo2016.to_csv('data/trafficing_2016.csv', index=False)
#traffickingdefault.to_csv('data/trafficing_default.csv', index=False)
traffickingGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,trafficedage,gender,year,total
0,country,IN,2011,Below 18 years,Male,2016,4123
1,country,IN,2011,Below 18 years,Female,2016,4911
2,country,IN,2011,Above 18 years,Male,2016,1106
3,country,IN,2011,Above 18 years,Female,2016,5239
4,district,532,2011,Below 18 years,Male,2016,0


# Trafficking motives

In [8]:
trafficmotcols = ['STATE NAME','TRAFFICED FOR (FORCED LABOUR)','TRAFFICED FOR (SEXUAL EXPLOITATION FOR PROSTITUTION)','TRAFFICED FOR (DOMESTIC SERVITUDE)','TRAFFICED FOR (FORCED MARRIAGE)','TRAFFICED FOR (PETTY CRIMES)','TRAFFICED FOR (CHILD PORNOGRAPHY)','TRAFFICED FOR (BEGGING)','TRAFFICED FOR (OTHER REASONS)']
traffickingmot2016    = crimedf2016[trafficmotcols]

traffickingmotGeo2016 = getCrimeData(traffickingmot2016 ,'trafficingmotives')
traffickingmotGeo2016.insert(2,'geo_version','2011')
traffickingmotGeo2016.insert(4,'year','2016')
traffickingmotGeo2016

# getting default year data
traffickingmotdefault =  traffickingmotGeo2016.copy(deep=True)
traffickingmotdefault['total'] = 0
traffickingmotdefault['year'] = '2016'
traffickingmotdefault

#traffickingmotGeo2016.to_csv('data/trafficingmotives_2016.csv', index=False)
#traffickingmotdefault.to_csv('data/trafficingmotives_default.csv', index=False)
traffickingmotGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,trafficingmotives,year,total
0,country,IN,2011,Forced labour,2016,10509
1,country,IN,2011,Sexual exploitation for prostitution,2016,4980
2,country,IN,2011,Domestic servitude,2016,2590
3,country,IN,2011,Forced marriage,2016,349
4,country,IN,2011,Petty crimes,2016,212


# Juvenile educational background

In [10]:
juvenileducols   = ['STATE NAME','JUVENILE EDU (ILLITERATE)','JUVENILE EDU (PRIMARY)','JUVENILE EDU (ABOVE PRIMARY BELOW MATRIC)','JUVENILE EDU (MATRIC/HSC & ABOVE)']

juveniledu2016    = crimedf2016[juvenileducols]
juvenileduGeo2016 = getCrimeData(juveniledu2016 ,'juveniledu')
juvenileduGeo2016.insert(2,'geo_version','2011')
juvenileduGeo2016.insert(4,'year','2016')
juvenileduGeo2016

# getting default year data
juveniledudefault =  juvenileduGeo2016.copy(deep=True)
juveniledudefault['total'] = 0
juveniledudefault['year'] = '2016'
juveniledudefault

#juvenileduGeo2016.to_csv('data/juveniledubackground_2016.csv', index=False)
#juveniledudefault.to_csv('data/juveniledubackground_default.csv', index=False)
juvenileduGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,juveniledu,year,total
0,country,IN,2011,Illiterate,2016,5412
1,country,IN,2011,Primary,2016,14501
2,country,IN,2011,Above primary below matric,2016,20014
3,country,IN,2011,Matric/hsc & above,2016,4244
4,district,532,2011,Illiterate,2016,0


# Juvenile family background

In [9]:
juvenilefmlcols  = ['STATE NAME','JUVENILE FAMILY BG (LIVING WITH PARENTS)','JUVENILE FAMILY BG (LIVING WITH GUARDIANS)','JUVENILE FAMILY BG (HOMELESS)']

juvenilefmlbg2016    = crimedf2016[juvenilefmlcols]
juvenilefmlbgGeo2016 = getCrimeData(juvenilefmlbg2016,'juvenilefamily')
juvenilefmlbgGeo2016.insert(2,'geo_version','2011')
juvenilefmlbgGeo2016.insert(4,'year','2016')
juvenilefmlbgGeo2016

# getting default year data
juvenilefmlbgdefault =  juvenilefmlbgGeo2016.copy(deep=True)
juvenilefmlbgdefault['total'] = 0
juvenilefmlbgdefault['year'] = '2016'
juvenilefmlbgdefault

#juvenilefmlbgGeo2016.to_csv('data/juvenilefamilybg_2016.csv', index=False)
#juvenilefmlbgdefault.to_csv('data/juvenilefamilybg_default.csv', index=False)
juvenilefmlbgGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,juvenilefamily,year,total
0,country,IN,2011,Living with parents,2016,38061
1,country,IN,2011,Living with guardians,2016,4550
2,country,IN,2011,Homeless,2016,1560
3,district,532,2011,Living with parents,2016,0
4,district,532,2011,Living with guardians,2016,0


# Murder victims in India

In [10]:
murderviccols = ['STATE NAME','MURDERED CHILD (MALE)','MURDERED CHILD (FEMALE)','MURDERED ADULT (MALE)','MURDERED ADULT (FEMALE)']

murdervic2016    = crimedf2016[murderviccols]
murdervicGeo2016 = getRapeData(murdervic2016,'murdervictemp')
murdervicGeo2016.insert(3,'murder',murdervicGeo2016['murdervictemp'].map(lambda x: 'Child' if 'CHILD' in x else 'Adult'))
murdervicGeo2016.insert(4,'gender',murdervicGeo2016['murdervictemp'].map(lambda x: 'Female' if 'FEMALE' in x else 'Male'))
murdervicGeo2016.insert(2,'geo_version','2011')
murdervicGeo2016.insert(6,'year','2016')
murdervicGeo2016 =  murdervicGeo2016.drop(['murdervictemp'],axis=1)
murdervicGeo2016

# getting default year data
murdervicdefault =  murdervicGeo2016.copy(deep=True)
murdervicdefault['total'] = 0
murdervicdefault['year'] = '2016'
murdervicdefault

#murdervicGeo2016.to_csv('data/murdervictims_2016.csv', index=False)
#murdervicdefault.to_csv('data/murdervictims_default.csv', index=False)
murdervicGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,murder,gender,year,total
0,country,IN,2011,Child,Male,2016,958
1,country,IN,2011,Child,Female,2016,817
2,country,IN,2011,Adult,Male,2016,21650
3,country,IN,2011,Adult,Female,2016,8071
4,district,532,2011,Child,Male,2016,0


# Murder motives

In [12]:
murdermotcols = ['STATE NAME','PERSONAL VENDETTA OR ENMITY','PROPERTY DISPUTE','GAIN','ILLICIT RELATIONSHIP','LOVE AFFAIRS','DOWRY','WATER OR MONEY DISPUTE','ROAD RAGE','EXTREMISM / NAXALISM','CASTEISM','ROBBERY/ EXTORTION','RAPE','CLASS CONFLICT','POLITICAL REASONS','COMMUNALISM']

murdermot2016    = crimedf2016[murdermotcols]
murdermotGeo2016 = getRapeData(murdermot2016,'murdermotive')
murdermotGeo2016['murdermotive'] = murdermotGeo2016['murdermotive'].str.capitalize()
murdermotGeo2016.insert(2,'geo_version','2011')
murdermotGeo2016.insert(4,'year','2016')
murdermotGeo2016

# getting default year data
murdermotdefault =  murdermotGeo2016.copy(deep=True)
murdermotdefault['total'] = 0
murdermotdefault['year'] = '2016'

#murdermotGeo2016.to_csv('data/murdermotive_2016.csv', index=False)
#murdermotdefault.to_csv('data/murdermotive_default.csv', index=False)
murdermotGeo2016.head()

Unnamed: 0,geo_level,geo_code,geo_version,murdermotive,year,total
0,country,IN,2011,Personal vendetta or enmity,2016,5179
1,country,IN,2011,Property dispute,2016,3424
2,country,IN,2011,Gain,2016,2270
3,country,IN,2011,Illicit relationship,2016,1671
4,country,IN,2011,Love affairs,2016,1493
