# Experiment description
## Hypothesis: 
Predictability values are similar considering different date intervals
## Method: 
- Remove outliers
- Measure predictability for 7 days timewindows and levelCrime=3 on intervals (months, bimesters, semesters)
- Implement a metric to compare the difference among predictability values

## Parameters: 
- Time windows: 7
- Crime levels: 3
- Aggregation: localidades

## Built-in methods

In [20]:
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
import matplotlib.pyplot as plt
import scipy
import math
from math import pi
import geopandas as gpd
import plotly.express as px
%matplotlib inline

In [2]:
def set_initial_dataset_day(df_by_date,name_day):
    df_by_date = df_by_date.reset_index()
    df_by_date['day_of_week'] = df_by_date['date'].dt.day_name()
    monday_idx = df_by_date.index[df_by_date['day_of_week'] == name_day].tolist()[0]
    print('monday_idx',monday_idx)
    df_by_date = df_by_date[monday_idx:].set_index('date').drop(['day_of_week'],axis=1)
    return df_by_date

In [3]:
# Methods for time windows
def im2patches(im,n):
    patches = [];
    for i in range(len(im)-n):
        patch = im[i:(i+n-1)]        
        patch = patch - np.nanmean(patch);
        if(np.linalg.norm(patch)>0):
            patch = patch/np.linalg.norm(patch);
        if i==0:
            patches = patch;
        else:
            patches = np.vstack((patches,patch))
    return patches;

def writeEmbeding(timeSeries,lenWindow,samplePath, scenarioName):
    slicingWindows = im2patches(timeSeries,lenWindow);
    workingPath = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/slicing/'
    prevStation = str(samplePath);
    with open(workingPath+'slicingWindows'+"_"+str(prevStation)+"_"+str(scenarioName)+"_"+str(lenWindow)+'_.pickle', 'wb') as f:
        lv = slicingWindows.tolist();                        
        pickle.dump(lv, f, protocol=2)

    workingPath = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/timeSeries/'    
    with open(workingPath+'timeSeries'+"_"+str(prevStation)+"_"+str(scenarioName)+"_"+str(lenWindow)+'_.pickle', 'wb') as f:
        lv = timeSeries.tolist();                        
        pickle.dump(lv, f, protocol=2)



In [4]:
#Methods for predictability
def getBarcode(samplePath,lenWindow,scenarioName):
    workingPath = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/'
    barcode = [];

    with open(workingPath+'timeSeries/'+'timeSeries_'+samplePath+"_"+str(scenarioName)+'_'+str(lenWindow)+'_'+'.pickle', 'rb') as f:
            timeSeries = pickle.load(f);            
    return (barcode,timeSeries);

def computeBarcodeEntropy(barsLenB0):
    barlen = np.array(barsLenB0);
    barlen = barlen/barlen.sum();
    hbc = 0;
    for i in range(barlen.shape[0]):
        if barlen[i]!=0:
            hbc = hbc-(barlen[i])*np.log(barlen[i]);
    return hbc;


def computeGeneralPredictability(timeSeries,binsData,lenWindow):
    # Colwell, R. K. (1974). Predictability, constancy, and contingency of periodic phenomena. Ecology, 55(5), 1148-1153.
    # Normalize the caudal values
    nLevels = binsData.shape[0]-1;
    matStations = np.array(timeSeries).reshape((np.array(timeSeries).shape[0]//lenWindow,lenWindow))    

    grandMean = np.mean(np.mean(matStations));
    #matStations = matStations / grandMean;
    N = np.zeros((nLevels,lenWindow));
    for i in range(1,matStations.shape[1]): 
        # Computes histograms per columns
        hist, bin_edges = np.histogram(matStations[:,i],bins = binsData);
        N[:,i] = hist;
    X = np.sum(N, axis=0);
    Y = np.sum(N, axis=1);
    Z = np.sum(Y);
    hx = 0;
    hy = 0;
    hxy = 0;
    for j in range(X.shape[0]):
        if X[j]!=0:
            hx = hx-(X[j]/Z)*np.log(X[j]/Z);
            
    for i in range(Y.shape[0]):
        if Y[i]!=0:
            hy = hy-(Y[i]/Z)*np.log(Y[i]/Z);
            
    for i in range(Y.shape[0]):
        for j in range(X.shape[0]):
            if N[i,j]!=0:
                hxy = hxy-((N[i,j]/Z)*np.log(N[i,j]/Z));    
    
    # predictability
    p = 1 - (hxy - hx)/np.log(N.shape[0]);
    # constancy
    c = 1 - hy/np.log(N.shape[0]);
    # Returns constancy and contingency
    return (c,p-c,p);



In [5]:
def preprocess_df(df,min_date_period,max_date_period):
    df=df.drop(columns=['PERIODO_TS','LOCALIDAD'])
    #Remove outliers
    q_hi = df["total_eventos"].quantile(0.99)
    df = df[(df["total_eventos"] < q_hi)]
    print('remove outliers')
    print(df.head(3))

    #Make sure dataset include consecutive dates in period
    idx = pd.date_range(min_date_period, max_date_period)
    df = df.reindex(idx, fill_value=int(df["total_eventos"].mean()))
    df = df.reset_index().rename(columns={'index': 'date'}).set_index('date')
    print('set consecutive dates')
    print(df.head(3))
    
    #Make sure dataset starts on Monday for the experiment
    df = set_initial_dataset_day(df,'Monday')
    print('starts on monday')
    print(df.head(3))
    
    return df

In [6]:
def saveTimeSeries(df,min_date_period,max_date_period,localidad, lenWindow, expName):       
    df_values = pd.Series(df['total_eventos']).values
    lT=get_LT(df, lenWindow)
    df_values = df_values[0:lT]
    print(lT, len(df_values))
    writeEmbeding(df_values,lenWindow,expName,localidad)

In [7]:
def get_LT(df_by_period,lenWindow):
    min_date = df_by_period.reset_index().date.min()
    max_date = df_by_period.reset_index().date.max()
    print('min date on localidad',min_date)
    print('max date on localidad',max_date)
    samples_num = (max_date.date()-min_date.date()).days
    print('samples_num',samples_num)
    lT = samples_num//lenWindow * lenWindow
    print('lT complete',samples_num/lenWindow)
    print('lT aprox',samples_num//lenWindow)
    return lT

In [8]:
def predictability_experiment_localidades(df_by_date,min_date_period,max_date_period,lenWindow,localidadesList,Levels,expName,periodName):
    workingPath = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/';

    flagF = True;
    for localidad in localidadesList:
        print(localidad)
        #write embeding
        df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]        
        df_by_localidad = preprocess_df(df_by_localidad,min_date_period,max_date_period)
        saveTimeSeries(df_by_localidad,min_date_period,max_date_period,localidad, lenWindow, expName)
        
        for nLevels in Levels:
            (barcode,timeSeries) = getBarcode(expName,lenWindow,localidad);
            binsLevels = np.linspace(np.min(timeSeries),np.max(timeSeries),nLevels);
            c,m,p = computeGeneralPredictability(timeSeries,binsLevels,lenWindow)

            if flagF==True:
                flagF = False
                predValues = np.array([expName,periodName,localidad,lenWindow,nLevels,p,m,c]);
            else:
                predValues = np.vstack((predValues, [expName,periodName,localidad,lenWindow,nLevels,p,m,c]))

    return predValues
        
    

In [9]:
def table_predictability_by_period_report(df_agressiveBehavior,lenWindow,localidadesList,yAxisCategories,name_experiment,nLevel):
    join=df_agressiveBehavior.pivot('localidad','period','predictability')
    var1_order = []
    var2_order = yAxisCategories
    if len(var2_order) > 0:
        join = join.reindex(var2_order, axis=1)
    if len(var1_order) > 0:
        join = join.reindex(var1_order)
    
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    fig.set_size_inches(7, 6)
    g=sns.heatmap(join.astype('float'),annot=True,fmt=".3",linewidths=0,cmap="Blues",cbar=False)
    g.set_yticklabels(g.get_yticklabels(), rotation = 0)
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    file_path = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/figures/'
    plt.savefig(file_path+'table_'+str(name_experiment)+'_predictability_time_'+str(lenWindow)+'_levels_'+str(nLevel),dpi=300,bbox_inches = "tight")
    plt.show()

In [10]:
def map_localidad(ax,df,col_localidad,col_vals,vmin=None,vmax=None):
  loc_geo="/Users/anamaria/Desktop/dev/security_project/assets/localidades_polygon.json"
  loc_=gpd.read_file(loc_geo)
  loc_=loc_.merge(df,left_on='LocNombre',right_on=col_localidad)
  loc_.plot(cmap='viridis',column=col_vals,legend=True,ax=ax,vmin=vmin,vmax=vmax)  

In [11]:
def map_predictability(df_crime, crime_level, lenWindow,name_experiment):
    subdata = df_crime[df_crime['crime_level']==crime_level]
    subdata = subdata[subdata['lenWindow']==str(lenWindow)]
    subdata["predictability"] = pd.to_numeric(subdata["predictability"])
    
    fig, ax = plt.subplots(figsize=(12,12))
    map_localidad(ax,subdata,'localidad','predictability')
    ax.axis('off')
    file_path = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/figures/'
    plt.savefig(file_path+'map_aggressiveBehavior_localidades'+str(name_experiment)+'_predictability_time_'+str(lenWindow)+'_levels_'+str(crime_level),dpi=300,bbox_inches = "tight")
    plt.show()

In [12]:
def build_semester_list(year,semester=1):
    if semester == 1:
        month_list = ['01','02','03','04','05','06']
    if semester == 2:
        month_list = ['07','08','09','10','11','12']
    semester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(semester_list))

def build_trimester_list(year,trimester=1):
    if trimester == 1:
        month_list = ['01','02','03']
    if trimester == 2:
        month_list = ['04','05','06']
    if trimester == 3:
        month_list = ['07','08','09']
    if trimester == 4:
        month_list = ['10','11','12']
    trimester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(trimester_list))

def build_bimester_list(year,bimester=1):
    if bimester == 1:
        month_list = ['01','02']
    if bimester == 2:
        month_list = ['03','04']
    if bimester == 3:
        month_list = ['05','06']
    if bimester == 4:
        month_list = ['07','08']
    if bimester == 5:
        month_list = ['09','10']
    if bimester == 6:
        month_list = ['11','12']
    bimester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(bimester_list))

## Load data

In [13]:
data_location = '/Users/anamaria/Desktop/dev/security_project/datasets/verify_enrich_nuse_29112019.csv'
df_input = pd.read_csv(data_location,delimiter=",")

In [14]:
df_input['date']=pd.to_datetime(df_input['FECHA'])
df_by_date = pd.DataFrame(df_input.groupby(['date','PERIODO_TS','LOCALIDAD']).size(),columns=["total_eventos"])

In [15]:
df_by_date = df_by_date.reset_index().set_index('date')

## Localidades, semester

In [16]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [17]:
years = list(df_input['ANIO'].unique())
years.remove(2019)
semesters = [1, 2]

In [None]:
predValues_array = []
for year in years:
    for semester in semesters:
        period_list = build_semester_list(str(year),semester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
        #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()
        print('min date on period',min_date_on_period)
        print('max_date_on_period',max_date_on_period)

        expName = 'aggressiveBehavior_localidad_by_semester'
        periodName = str(year)+'-'+str(semester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
periodCategories = list(df_prediction['period'].unique())
table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



In [None]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'semester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'semester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

### Localidades with higher predictability values

In [53]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year")
fig.show()

In [54]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year")
fig.show()

### Localidades with low predictability values

In [55]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year")
fig.show()

In [56]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year")
fig.show()

## Localidades, trimester

In [57]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [58]:
years = list(df_input['ANIO'].unique())
years.remove(2019)
trimesters = [1, 2, 3, 4]

In [59]:
predValues_array = []
for year in years:
    for trimester in trimesters:
        period_list = build_trimester_list(str(year),trimester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
        #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()
        print(min_date_on_period,max_date_on_period)

        expName = 'aggressiveBehavior_localidad_by_trimester'
        periodName = str(year)+'-'+str(trimester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



['2017/01', '2017/02', '2017/03']
2017-01-01 00:00:00 2017-03-31 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
set consecutive dates
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
monday_idx 1
starts on monday
            total_eventos
date                     
2017-01-02              8
2017-01-03             19
2017-01-04             13
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-03-31 00:00:00
samples_num 88
lT complete 12.571428571428571
lT aprox 12
84 84
BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03             15
set consecutive dates
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03        

USME
remove outliers
            total_eventos
date                     
2017-01-02             34
2017-01-03             26
2017-01-04             30
set consecutive dates
            total_eventos
date                     
2017-01-01             49
2017-01-02             34
2017-01-03             26
monday_idx 1
starts on monday
            total_eventos
date                     
2017-01-02             34
2017-01-03             26
2017-01-04             30
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-03-31 00:00:00
samples_num 88
lT complete 12.571428571428571
lT aprox 12
84 84
['2017/04', '2017/05', '2017/06']
2017-04-01 00:00:00 2017-06-30 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2017-04-01             21
2017-04-02             34
2017-04-03             22
set consecutive dates
            total_eventos
date                     
2017-04-01             21
2017-04-02             34
2017-04-03             22
mo

TUNJUELITO
remove outliers
            total_eventos
date                     
2017-04-01             40
2017-04-02             45
2017-04-03             34
set consecutive dates
            total_eventos
date                     
2017-04-01             40
2017-04-02             45
2017-04-03             34
monday_idx 2
starts on monday
            total_eventos
date                     
2017-04-03             34
2017-04-04             21
2017-04-05             29
min date on localidad 2017-04-03 00:00:00
max date on localidad 2017-06-30 00:00:00
samples_num 88
lT complete 12.571428571428571
lT aprox 12
84 84
USAQUEN
remove outliers
            total_eventos
date                     
2017-04-01            113
2017-04-03             44
2017-04-04             56
set consecutive dates
            total_eventos
date                     
2017-04-01            113
2017-04-02             62
2017-04-03             44
monday_idx 2
starts on monday
            total_eventos
date                 

set consecutive dates
            total_eventos
date                     
2017-07-01            191
2017-07-02            262
2017-07-03            159
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03            159
2017-07-04            110
2017-07-05             97
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-09-30 00:00:00
samples_num 89
lT complete 12.714285714285714
lT aprox 12
84 84
TEUSAQUILLO
remove outliers
            total_eventos
date                     
2017-07-01             34
2017-07-02             28
2017-07-03             26
set consecutive dates
            total_eventos
date                     
2017-07-01             34
2017-07-02             28
2017-07-03             26
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03             26
2017-07-04             15
2017-07-05             13
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-0

SANTA FE
remove outliers
            total_eventos
date                     
2017-10-01             27
2017-10-02             27
2017-10-03             17
set consecutive dates
            total_eventos
date                     
2017-10-01             27
2017-10-02             27
2017-10-03             17
monday_idx 1
starts on monday
            total_eventos
date                     
2017-10-02             27
2017-10-03             17
2017-10-04             26
min date on localidad 2017-10-02 00:00:00
max date on localidad 2017-12-31 00:00:00
samples_num 90
lT complete 12.857142857142858
lT aprox 12
84 84
SUBA
remove outliers
            total_eventos
date                     
2017-10-01            343
2017-10-02            107
2017-10-03             93
set consecutive dates
            total_eventos
date                     
2017-10-01            343
2017-10-02            107
2017-10-03             93
monday_idx 1
starts on monday
            total_eventos
date                     


2018-01-04             55
set consecutive dates
            total_eventos
date                     
2018-01-01             60
2018-01-02             41
2018-01-03             36
monday_idx 0
starts on monday
            total_eventos
date                     
2018-01-01             60
2018-01-02             41
2018-01-03             36
min date on localidad 2018-01-01 00:00:00
max date on localidad 2018-03-31 00:00:00
samples_num 89
lT complete 12.714285714285714
lT aprox 12
84 84
SANTA FE
remove outliers
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
set consecutive dates
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
monday_idx 0
starts on monday
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
min date on localidad 2018-01-01 00:00:00
max d

set consecutive dates
            total_eventos
date                     
2018-04-01             63
2018-04-02             44
2018-04-03             33
monday_idx 1
starts on monday
            total_eventos
date                     
2018-04-02             44
2018-04-03             33
2018-04-04             27
min date on localidad 2018-04-02 00:00:00
max date on localidad 2018-06-30 00:00:00
samples_num 89
lT complete 12.714285714285714
lT aprox 12
84 84
SAN CRISTOBAL
remove outliers
            total_eventos
date                     
2018-04-01             67
2018-04-02             38
2018-04-03             36
set consecutive dates
            total_eventos
date                     
2018-04-01             67
2018-04-02             38
2018-04-03             36
monday_idx 1
starts on monday
            total_eventos
date                     
2018-04-02             38
2018-04-03             36
2018-04-04             45
min date on localidad 2018-04-02 00:00:00
max date on localidad 2018

RAFAEL URIBE URIBE
remove outliers
            total_eventos
date                     
2018-07-01            119
2018-07-02             56
2018-07-03             74
set consecutive dates
            total_eventos
date                     
2018-07-01            119
2018-07-02             56
2018-07-03             74
monday_idx 1
starts on monday
            total_eventos
date                     
2018-07-02             56
2018-07-03             74
2018-07-04             43
min date on localidad 2018-07-02 00:00:00
max date on localidad 2018-09-30 00:00:00
samples_num 90
lT complete 12.857142857142858
lT aprox 12
84 84
SAN CRISTOBAL
remove outliers
            total_eventos
date                     
2018-07-01            113
2018-07-02             71
2018-07-03             66
set consecutive dates
            total_eventos
date                     
2018-07-01            113
2018-07-02             71
2018-07-03             66
monday_idx 1
starts on monday
            total_eventos
date   

monday_idx 0
starts on monday
            total_eventos
date                     
2018-10-01             33
2018-10-02             39
2018-10-03             29
min date on localidad 2018-10-01 00:00:00
max date on localidad 2018-12-31 00:00:00
samples_num 91
lT complete 13.0
lT aprox 13
91 91
RAFAEL URIBE URIBE
remove outliers
            total_eventos
date                     
2018-10-01             46
2018-10-02             46
2018-10-03             37
set consecutive dates
            total_eventos
date                     
2018-10-01             46
2018-10-02             46
2018-10-03             37
monday_idx 0
starts on monday
            total_eventos
date                     
2018-10-01             46
2018-10-02             46
2018-10-03             37
min date on localidad 2018-10-01 00:00:00
max date on localidad 2018-12-31 00:00:00
samples_num 91
lT complete 13.0
lT aprox 13
91 91
SAN CRISTOBAL
remove outliers
            total_eventos
date                     
2018-10-01   

In [60]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'trimester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'trimester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("-3"), 'trimester'] = 'III'
df_prediction.loc[df_prediction.period.str.contains("-4"), 'trimester'] = 'IV'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

### Localidades with higher predictability values

In [62]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year")
fig.show()

In [63]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year")
fig.show()

### Localidades with low predictability values

In [64]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year")
fig.show()

In [66]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year")
fig.show()

## Localidades, bimester

In [68]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [69]:
years = list(df_input['ANIO'].unique())
years.remove(2019)
bimesters = [1, 2, 3, 4, 5, 6]

In [70]:
predValues_array = []
for year in years:
    for bimester in bimesters:
        period_list = build_bimester_list(str(year),bimester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
        #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()
        print(min_date_on_period,max_date_on_period)

        expName = 'aggressiveBehavior_localidad_by_bimester'
        periodName = str(year)+'-'+str(bimester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



['2017/01', '2017/02']
2017-01-01 00:00:00 2017-02-28 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
set consecutive dates
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
monday_idx 1
starts on monday
            total_eventos
date                     
2017-01-02              8
2017-01-03             19
2017-01-04             13
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-02-28 00:00:00
samples_num 57
lT complete 8.142857142857142
lT aprox 8
56 56
BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03             15
set consecutive dates
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03             15
monda

starts on monday
            total_eventos
date                     
2017-01-02             34
2017-01-03             26
2017-01-04             30
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-02-28 00:00:00
samples_num 57
lT complete 8.142857142857142
lT aprox 8
56 56
['2017/03', '2017/04']
2017-03-01 00:00:00 2017-04-30 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2017-03-01             25
2017-03-02             16
2017-03-03             17
set consecutive dates
            total_eventos
date                     
2017-03-01             25
2017-03-02             16
2017-03-03             17
monday_idx 5
starts on monday
            total_eventos
date                     
2017-03-06             10
2017-03-07              8
2017-03-08             12
min date on localidad 2017-03-06 00:00:00
max date on localidad 2017-04-30 00:00:00
samples_num 55
lT complete 7.857142857142857
lT aprox 7
49 49
BARRIOS UNIDOS
remove out

monday_idx 0
starts on monday
            total_eventos
date                     
2017-05-01             14
2017-05-02             16
2017-05-03             15
min date on localidad 2017-05-01 00:00:00
max date on localidad 2017-06-30 00:00:00
samples_num 60
lT complete 8.571428571428571
lT aprox 8
56 56
BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2017-05-01             16
2017-05-02             20
2017-05-03             17
set consecutive dates
            total_eventos
date                     
2017-05-01             16
2017-05-02             20
2017-05-03             17
monday_idx 0
starts on monday
            total_eventos
date                     
2017-05-01             16
2017-05-02             20
2017-05-03             17
min date on localidad 2017-05-01 00:00:00
max date on localidad 2017-06-30 00:00:00
samples_num 60
lT complete 8.571428571428571
lT aprox 8
56 56
BOSA
remove outliers
            total_eventos
date                     
20

set consecutive dates
            total_eventos
date                     
2017-07-01             34
2017-07-02             24
2017-07-03             19
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03             19
2017-07-04             23
2017-07-05             21
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-08-31 00:00:00
samples_num 59
lT complete 8.428571428571429
lT aprox 8
56 56
BOSA
remove outliers
            total_eventos
date                     
2017-07-01            122
2017-07-02            198
2017-07-03            121
set consecutive dates
            total_eventos
date                     
2017-07-01            122
2017-07-02            198
2017-07-03            121
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03            121
2017-07-04             64
2017-07-05             63
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-08-31 00:0

min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-08-31 00:00:00
samples_num 59
lT complete 8.428571428571429
lT aprox 8
56 56
TEUSAQUILLO
remove outliers
            total_eventos
date                     
2017-07-01             34
2017-07-02             28
2017-07-03             26
set consecutive dates
            total_eventos
date                     
2017-07-01             34
2017-07-02             28
2017-07-03             26
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03             26
2017-07-04             15
2017-07-05             13
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-08-31 00:00:00
samples_num 59
lT complete 8.428571428571429
lT aprox 8
56 56
TUNJUELITO
remove outliers
            total_eventos
date                     
2017-07-01             36
2017-07-02             41
2017-07-03             28
set consecutive dates
            total_eventos
date                     
2017-07

set consecutive dates
            total_eventos
date                     
2017-09-01             32
2017-09-02             28
2017-09-03             30
monday_idx 3
starts on monday
            total_eventos
date                     
2017-09-04             16
2017-09-05             20
2017-09-06             19
min date on localidad 2017-09-04 00:00:00
max date on localidad 2017-10-31 00:00:00
samples_num 57
lT complete 8.142857142857142
lT aprox 8
56 56
PUENTE ARANDA
remove outliers
            total_eventos
date                     
2017-09-01             39
2017-09-02             59
2017-09-03             55
set consecutive dates
            total_eventos
date                     
2017-09-01             39
2017-09-02             59
2017-09-03             55
monday_idx 3
starts on monday
            total_eventos
date                     
2017-09-04             27
2017-09-05             30
2017-09-06             29
min date on localidad 2017-09-04 00:00:00
max date on localidad 2017-1

set consecutive dates
            total_eventos
date                     
2017-11-01             32
2017-11-02             26
2017-11-03             35
monday_idx 5
starts on monday
            total_eventos
date                     
2017-11-06             18
2017-11-07             11
2017-11-08             24
min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-12-31 00:00:00
samples_num 55
lT complete 7.857142857142857
lT aprox 7
49 49
CIUDAD BOLIVAR
remove outliers
            total_eventos
date                     
2017-11-01             90
2017-11-02             71
2017-11-03             64
set consecutive dates
            total_eventos
date                     
2017-11-01             90
2017-11-02             71
2017-11-03             64
monday_idx 5
starts on monday
            total_eventos
date                     
2017-11-06            129
2017-11-07             69
2017-11-08             66
min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-

set consecutive dates
            total_eventos
date                     
2017-11-01             56
2017-11-02             37
2017-11-03             29
monday_idx 5
starts on monday
            total_eventos
date                     
2017-11-06             79
2017-11-07             39
2017-11-08             34
min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-12-31 00:00:00
samples_num 55
lT complete 7.857142857142857
lT aprox 7
49 49
['2018/01', '2018/02']
2018-01-01 00:00:00 2018-02-28 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2018-01-01             25
2018-01-02             10
2018-01-03             12
set consecutive dates
            total_eventos
date                     
2018-01-01             25
2018-01-02             10
2018-01-03             12
monday_idx 0
starts on monday
            total_eventos
date                     
2018-01-01             25
2018-01-02             10
2018-01-03             12
min da

SANTA FE
remove outliers
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
set consecutive dates
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
monday_idx 0
starts on monday
            total_eventos
date                     
2018-01-01             37
2018-01-02             18
2018-01-03             21
min date on localidad 2018-01-01 00:00:00
max date on localidad 2018-02-28 00:00:00
samples_num 58
lT complete 8.285714285714286
lT aprox 8
56 56
SUBA
remove outliers
            total_eventos
date                     
2018-01-02             88
2018-01-03             87
2018-01-04             82
set consecutive dates
            total_eventos
date                     
2018-01-01            129
2018-01-02             88
2018-01-03             87
monday_idx 0
starts on monday
            total_eventos
date                     
20

starts on monday
            total_eventos
date                     
2018-03-05             35
2018-03-06             31
2018-03-07             30
min date on localidad 2018-03-05 00:00:00
max date on localidad 2018-04-30 00:00:00
samples_num 56
lT complete 8.0
lT aprox 8
56 56
KENNEDY
remove outliers
            total_eventos
date                     
2018-03-01            131
2018-03-02            143
2018-03-03            221
set consecutive dates
            total_eventos
date                     
2018-03-01            131
2018-03-02            143
2018-03-03            221
monday_idx 4
starts on monday
            total_eventos
date                     
2018-03-05            130
2018-03-06            109
2018-03-07            119
min date on localidad 2018-03-05 00:00:00
max date on localidad 2018-04-30 00:00:00
samples_num 56
lT complete 8.0
lT aprox 8
56 56
LOS MARTIRES
remove outliers
            total_eventos
date                     
2018-03-01             15
2018-03-02      

monday_idx 6
starts on monday
            total_eventos
date                     
2018-05-07             91
2018-05-08             53
2018-05-09             74
min date on localidad 2018-05-07 00:00:00
max date on localidad 2018-06-30 00:00:00
samples_num 54
lT complete 7.714285714285714
lT aprox 7
49 49
CANDELARIA
remove outliers
            total_eventos
date                     
2018-05-01             10
2018-05-02              3
2018-05-03              3
set consecutive dates
            total_eventos
date                     
2018-05-01             10
2018-05-02              3
2018-05-03              3
monday_idx 6
starts on monday
            total_eventos
date                     
2018-05-07              3
2018-05-08              8
2018-05-09              3
min date on localidad 2018-05-07 00:00:00
max date on localidad 2018-06-30 00:00:00
samples_num 54
lT complete 7.714285714285714
lT aprox 7
49 49
CHAPINERO
remove outliers
            total_eventos
date                     
2

monday_idx 1
starts on monday
            total_eventos
date                     
2018-07-02            104
2018-07-03            108
2018-07-04             83
min date on localidad 2018-07-02 00:00:00
max date on localidad 2018-08-31 00:00:00
samples_num 60
lT complete 8.571428571428571
lT aprox 8
56 56
CANDELARIA
remove outliers
            total_eventos
date                     
2018-07-01             11
2018-07-02              4
2018-07-03             11
set consecutive dates
            total_eventos
date                     
2018-07-01             11
2018-07-02              4
2018-07-03             11
monday_idx 1
starts on monday
            total_eventos
date                     
2018-07-02              4
2018-07-03             11
2018-07-04              7
min date on localidad 2018-07-02 00:00:00
max date on localidad 2018-08-31 00:00:00
samples_num 60
lT complete 8.571428571428571
lT aprox 8
56 56
CHAPINERO
remove outliers
            total_eventos
date                     
2

min date on localidad 2018-09-03 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 58
lT complete 8.285714285714286
lT aprox 8
56 56
BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2018-09-01             37
2018-09-02             29
2018-09-03             26
set consecutive dates
            total_eventos
date                     
2018-09-01             37
2018-09-02             29
2018-09-03             26
monday_idx 2
starts on monday
            total_eventos
date                     
2018-09-03             26
2018-09-04             17
2018-09-05             29
min date on localidad 2018-09-03 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 58
lT complete 8.285714285714286
lT aprox 8
56 56
BOSA
remove outliers
            total_eventos
date                     
2018-09-01            161
2018-09-03            100
2018-09-04             70
set consecutive dates
            total_eventos
date                     
2018-09-01

min date on localidad 2018-09-03 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 58
lT complete 8.285714285714286
lT aprox 8
56 56
USME
remove outliers
            total_eventos
date                     
2018-09-01             52
2018-09-03             31
2018-09-04             44
set consecutive dates
            total_eventos
date                     
2018-09-01             52
2018-09-02             45
2018-09-03             31
monday_idx 2
starts on monday
            total_eventos
date                     
2018-09-03             31
2018-09-04             44
2018-09-05             41
min date on localidad 2018-09-03 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 58
lT complete 8.285714285714286
lT aprox 8
56 56
['2018/11', '2018/12']
2018-11-01 00:00:00 2018-12-31 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2018-11-01             15
2018-11-02             14
2018-11-03             18
set consecutive dates

set consecutive dates
            total_eventos
date                     
2018-11-01             23
2018-11-02             22
2018-11-03             34
monday_idx 4
starts on monday
            total_eventos
date                     
2018-11-05             13
2018-11-06             26
2018-11-07             23
min date on localidad 2018-11-05 00:00:00
max date on localidad 2018-12-31 00:00:00
samples_num 56
lT complete 8.0
lT aprox 8
56 56
TUNJUELITO
remove outliers
            total_eventos
date                     
2018-11-01             23
2018-11-02             27
2018-11-03             29
set consecutive dates
            total_eventos
date                     
2018-11-01             23
2018-11-02             27
2018-11-03             29
monday_idx 4
starts on monday
            total_eventos
date                     
2018-11-05             30
2018-11-06             24
2018-11-07             22
min date on localidad 2018-11-05 00:00:00
max date on localidad 2018-12-31 00:00:00
sam

In [71]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'bimester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'bimester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("-3"), 'bimester'] = 'III'
df_prediction.loc[df_prediction.period.str.contains("-4"), 'bimester'] = 'IV'
df_prediction.loc[df_prediction.period.str.contains("-5"), 'bimester'] = 'V'
df_prediction.loc[df_prediction.period.str.contains("-6"), 'bimester'] = 'VI'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

### Localidades with higher predictability values

In [72]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

In [73]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

### Localidades with low predictability values

In [74]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

In [75]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

## Localidades, months

In [76]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [77]:
months = df_by_date.PERIODO_TS.unique()
months

array(['2017/01', '2017/02', '2017/03', '2017/04', '2017/05', '2017/06',
       '2017/07', '2017/08', '2017/09', '2017/10', '2017/11', '2017/12',
       '2018/01', '2018/02', '2018/03', '2018/04', '2018/05', '2018/06',
       '2018/07', '2018/08', '2018/09', '2018/10', '2018/11', '2018/12',
       '2019/01'], dtype=object)

In [78]:
# NOTE: change size table
#fig.set_size_inches(14, 6)

In [79]:
predValues_array = []
for month in months:
    period_list = []
    period_list.append(month)
    print(period_list)
    df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
    #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

    min_date_on_period = df_by_period.reset_index().date.min()
    max_date_on_period = df_by_period.reset_index().date.max()
    print(min_date_on_period,max_date_on_period)

    expName = 'aggressiveBehavior_localidad_by_month'
    periodName = str(month)
    predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
    predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



['2017/01']
2017-01-01 00:00:00 2017-01-31 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
set consecutive dates
            total_eventos
date                     
2017-01-01             24
2017-01-02              8
2017-01-03             19
monday_idx 1
starts on monday
            total_eventos
date                     
2017-01-02              8
2017-01-03             19
2017-01-04             13
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-01-31 00:00:00
samples_num 29
lT complete 4.142857142857143
lT aprox 4
28 28
BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03             15
set consecutive dates
            total_eventos
date                     
2017-01-01             34
2017-01-02             12
2017-01-03             15
monday_idx 1
sta

min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-01-31 00:00:00
samples_num 29
lT complete 4.142857142857143
lT aprox 4
28 28
SUBA
remove outliers
            total_eventos
date                     
2017-01-02             88
2017-01-03             92
2017-01-04             96
set consecutive dates
            total_eventos
date                     
2017-01-01            109
2017-01-02             88
2017-01-03             92
monday_idx 1
starts on monday
            total_eventos
date                     
2017-01-02             88
2017-01-03             92
2017-01-04             96
min date on localidad 2017-01-02 00:00:00
max date on localidad 2017-01-31 00:00:00
samples_num 29
lT complete 4.142857142857143
lT aprox 4
28 28
TEUSAQUILLO
remove outliers
            total_eventos
date                     
2017-01-01             16
2017-01-02             13
2017-01-03             12
set consecutive dates
            total_eventos
date                     
2017-01-01   

remove outliers
            total_eventos
date                     
2017-02-01            112
2017-02-02            111
2017-02-03            123
set consecutive dates
            total_eventos
date                     
2017-02-01            112
2017-02-02            111
2017-02-03            123
monday_idx 5
starts on monday
            total_eventos
date                     
2017-02-06             86
2017-02-07            107
2017-02-08            116
min date on localidad 2017-02-06 00:00:00
max date on localidad 2017-02-28 00:00:00
samples_num 22
lT complete 3.142857142857143
lT aprox 3
21 21
TEUSAQUILLO
remove outliers
            total_eventos
date                     
2017-02-01             22
2017-02-02             17
2017-02-03             31
set consecutive dates
            total_eventos
date                     
2017-02-01             22
2017-02-02             17
2017-02-03             31
monday_idx 5
starts on monday
            total_eventos
date                     
2017

monday_idx 5
starts on monday
            total_eventos
date                     
2017-03-06             18
2017-03-07             30
2017-03-08             17
min date on localidad 2017-03-06 00:00:00
max date on localidad 2017-03-31 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
SUBA
remove outliers
            total_eventos
date                     
2017-03-01            113
2017-03-02            121
2017-03-03            130
set consecutive dates
            total_eventos
date                     
2017-03-01            113
2017-03-02            121
2017-03-03            130
monday_idx 5
starts on monday
            total_eventos
date                     
2017-03-06            124
2017-03-07             83
2017-03-08            102
min date on localidad 2017-03-06 00:00:00
max date on localidad 2017-03-31 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
TEUSAQUILLO
remove outliers
            total_eventos
date                     
201

set consecutive dates
            total_eventos
date                     
2017-04-01             22
2017-04-02             31
2017-04-03             22
monday_idx 2
starts on monday
            total_eventos
date                     
2017-04-03             22
2017-04-04             19
2017-04-05             33
min date on localidad 2017-04-03 00:00:00
max date on localidad 2017-04-30 00:00:00
samples_num 27
lT complete 3.857142857142857
lT aprox 3
21 21
PUENTE ARANDA
remove outliers
            total_eventos
date                     
2017-04-01             56
2017-04-03             28
2017-04-04             22
set consecutive dates
            total_eventos
date                     
2017-04-01             56
2017-04-02             36
2017-04-03             28
monday_idx 2
starts on monday
            total_eventos
date                     
2017-04-03             28
2017-04-04             22
2017-04-05             26
min date on localidad 2017-04-03 00:00:00
max date on localidad 2017-0

set consecutive dates
            total_eventos
date                     
2017-05-01            135
2017-05-02             81
2017-05-03             56
monday_idx 0
starts on monday
            total_eventos
date                     
2017-05-01            135
2017-05-02             81
2017-05-03             56
min date on localidad 2017-05-01 00:00:00
max date on localidad 2017-05-31 00:00:00
samples_num 30
lT complete 4.285714285714286
lT aprox 4
28 28
FONTIBON
remove outliers
            total_eventos
date                     
2017-05-01             37
2017-05-02             28
2017-05-03             23
set consecutive dates
            total_eventos
date                     
2017-05-01             37
2017-05-02             28
2017-05-03             23
monday_idx 0
starts on monday
            total_eventos
date                     
2017-05-01             37
2017-05-02             28
2017-05-03             23
min date on localidad 2017-05-01 00:00:00
max date on localidad 2017-05-31 

remove outliers
            total_eventos
date                     
2017-06-01             23
2017-06-02             47
2017-06-03             75
set consecutive dates
            total_eventos
date                     
2017-06-01             23
2017-06-02             47
2017-06-03             75
monday_idx 4
starts on monday
            total_eventos
date                     
2017-06-05             32
2017-06-06             28
2017-06-07             50
min date on localidad 2017-06-05 00:00:00
max date on localidad 2017-06-30 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
KENNEDY
remove outliers
            total_eventos
date                     
2017-06-01            125
2017-06-02            127
2017-06-03            247
set consecutive dates
            total_eventos
date                     
2017-06-01            125
2017-06-02            127
2017-06-03            247
monday_idx 4
starts on monday
            total_eventos
date                     
2017-06

remove outliers
            total_eventos
date                     
2017-07-01            122
2017-07-02            198
2017-07-03            121
set consecutive dates
            total_eventos
date                     
2017-07-01            122
2017-07-02            198
2017-07-03            121
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03            121
2017-07-04             64
2017-07-05             63
min date on localidad 2017-07-03 00:00:00
max date on localidad 2017-07-31 00:00:00
samples_num 28
lT complete 4.0
lT aprox 4
28 28
CANDELARIA
remove outliers
            total_eventos
date                     
2017-07-01              6
2017-07-02              7
2017-07-03              4
set consecutive dates
            total_eventos
date                     
2017-07-01              6
2017-07-02              7
2017-07-03              4
monday_idx 2
starts on monday
            total_eventos
date                     
2017-07-03         

2017-08-03             90
monday_idx 6
starts on monday
            total_eventos
date                     
2017-08-07            114
2017-08-08             67
2017-08-09             64
min date on localidad 2017-08-07 00:00:00
max date on localidad 2017-08-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
CANDELARIA
remove outliers
            total_eventos
date                     
2017-08-01              3
2017-08-02              6
2017-08-03              4
set consecutive dates
            total_eventos
date                     
2017-08-01              3
2017-08-02              6
2017-08-03              4
monday_idx 6
starts on monday
            total_eventos
date                     
2017-08-07              4
2017-08-08              3
2017-08-09              6
min date on localidad 2017-08-07 00:00:00
max date on localidad 2017-08-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
CHAPINERO
remove outliers
            total_eventos

2017-09-03            177
set consecutive dates
            total_eventos
date                     
2017-09-01            101
2017-09-02            113
2017-09-03            177
monday_idx 3
starts on monday
            total_eventos
date                     
2017-09-04             59
2017-09-05            108
2017-09-06             86
min date on localidad 2017-09-04 00:00:00
max date on localidad 2017-09-30 00:00:00
samples_num 26
lT complete 3.7142857142857144
lT aprox 3
21 21
CANDELARIA
remove outliers
            total_eventos
date                     
2017-09-01              1
2017-09-02              9
2017-09-03              8
set consecutive dates
            total_eventos
date                     
2017-09-01              1
2017-09-02              9
2017-09-03              8
monday_idx 3
starts on monday
            total_eventos
date                     
2017-09-04              3
2017-09-05              6
2017-09-06              4
min date on localidad 2017-09-04 00:00:00
max 

CANDELARIA
remove outliers
            total_eventos
date                     
2017-10-01              5
2017-10-02              3
2017-10-03              4
set consecutive dates
            total_eventos
date                     
2017-10-01              5
2017-10-02              3
2017-10-03              4
monday_idx 1
starts on monday
            total_eventos
date                     
2017-10-02              3
2017-10-03              4
2017-10-04              1
min date on localidad 2017-10-02 00:00:00
max date on localidad 2017-10-31 00:00:00
samples_num 29
lT complete 4.142857142857143
lT aprox 4
28 28
CHAPINERO
remove outliers
            total_eventos
date                     
2017-10-01             46
2017-10-02             25
2017-10-03             29
set consecutive dates
            total_eventos
date                     
2017-10-01             46
2017-10-02             25
2017-10-03             29
monday_idx 1
starts on monday
            total_eventos
date                 

lT aprox 3
21 21
CANDELARIA
remove outliers
            total_eventos
date                     
2017-11-01             11
2017-11-02              5
2017-11-03              6
set consecutive dates
            total_eventos
date                     
2017-11-01             11
2017-11-02              5
2017-11-03              6
monday_idx 5
starts on monday
            total_eventos
date                     
2017-11-06             10
2017-11-07              3
2017-11-08              9
min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-11-30 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
CHAPINERO
remove outliers
            total_eventos
date                     
2017-11-01             32
2017-11-02             26
2017-11-03             35
set consecutive dates
            total_eventos
date                     
2017-11-01             32
2017-11-02             26
2017-11-03             35
monday_idx 5
starts on monday
            total_eventos
dat

min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-11-30 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
USAQUEN
remove outliers
            total_eventos
date                     
2017-11-01             46
2017-11-02             60
2017-11-03             58
set consecutive dates
            total_eventos
date                     
2017-11-01             46
2017-11-02             60
2017-11-03             58
monday_idx 5
starts on monday
            total_eventos
date                     
2017-11-06             64
2017-11-07             54
2017-11-08             49
min date on localidad 2017-11-06 00:00:00
max date on localidad 2017-11-30 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
USME
remove outliers
            total_eventos
date                     
2017-11-01             56
2017-11-02             37
2017-11-03             29
set consecutive dates
            total_eventos
date                     
2017-11-01     

remove outliers
            total_eventos
date                     
2017-12-01             57
2017-12-02             79
2017-12-03            145
set consecutive dates
            total_eventos
date                     
2017-12-01             57
2017-12-02             79
2017-12-03            145
monday_idx 3
starts on monday
            total_eventos
date                     
2017-12-04             53
2017-12-05             43
2017-12-06             44
min date on localidad 2017-12-04 00:00:00
max date on localidad 2017-12-31 00:00:00
samples_num 27
lT complete 3.857142857142857
lT aprox 3
21 21
SANTA FE
remove outliers
            total_eventos
date                     
2017-12-01             36
2017-12-02             21
2017-12-03             33
set consecutive dates
            total_eventos
date                     
2017-12-01             36
2017-12-02             21
2017-12-03             33
monday_idx 3
starts on monday
            total_eventos
date                     
2017-12

KENNEDY
remove outliers
            total_eventos
date                     
2018-01-02             78
2018-01-03            125
2018-01-04            103
set consecutive dates
            total_eventos
date                     
2018-01-01            118
2018-01-02             78
2018-01-03            125
monday_idx 0
starts on monday
            total_eventos
date                     
2018-01-01            118
2018-01-02             78
2018-01-03            125
min date on localidad 2018-01-01 00:00:00
max date on localidad 2018-01-31 00:00:00
samples_num 30
lT complete 4.285714285714286
lT aprox 4
28 28
LOS MARTIRES
remove outliers
            total_eventos
date                     
2018-01-01             20
2018-01-02             15
2018-01-03              4
set consecutive dates
            total_eventos
date                     
2018-01-01             20
2018-01-02             15
2018-01-03              4
monday_idx 0
starts on monday
            total_eventos
date                 

min date on localidad 2018-02-05 00:00:00
max date on localidad 2018-02-28 00:00:00
samples_num 23
lT complete 3.2857142857142856
lT aprox 3
21 21
CHAPINERO
remove outliers
            total_eventos
date                     
2018-02-01             26
2018-02-02             41
2018-02-04             41
set consecutive dates
            total_eventos
date                     
2018-02-01             26
2018-02-02             41
2018-02-03             31
monday_idx 4
starts on monday
            total_eventos
date                     
2018-02-05             27
2018-02-06             21
2018-02-07             30
min date on localidad 2018-02-05 00:00:00
max date on localidad 2018-02-28 00:00:00
samples_num 23
lT complete 3.2857142857142856
lT aprox 3
21 21
CIUDAD BOLIVAR
remove outliers
            total_eventos
date                     
2018-02-01             89
2018-02-02             75
2018-02-03            119
set consecutive dates
            total_eventos
date                     
201

2018-02-07             60
min date on localidad 2018-02-05 00:00:00
max date on localidad 2018-02-28 00:00:00
samples_num 23
lT complete 3.2857142857142856
lT aprox 3
21 21
USME
remove outliers
            total_eventos
date                     
2018-02-01             31
2018-02-02             31
2018-02-03             38
set consecutive dates
            total_eventos
date                     
2018-02-01             31
2018-02-02             31
2018-02-03             38
monday_idx 4
starts on monday
            total_eventos
date                     
2018-02-05             40
2018-02-06             35
2018-02-07             42
min date on localidad 2018-02-05 00:00:00
max date on localidad 2018-02-28 00:00:00
samples_num 23
lT complete 3.2857142857142856
lT aprox 3
21 21
['2018/03']
2018-03-01 00:00:00 2018-03-31 00:00:00
ANTONIO NARIÑO
remove outliers
            total_eventos
date                     
2018-03-01             13
2018-03-02             17
2018-03-04             24
set 

remove outliers
            total_eventos
date                     
2018-03-01             23
2018-03-02             31
2018-03-03             39
set consecutive dates
            total_eventos
date                     
2018-03-01             23
2018-03-02             31
2018-03-03             39
monday_idx 4
starts on monday
            total_eventos
date                     
2018-03-05             22
2018-03-06             22
2018-03-07             22
min date on localidad 2018-03-05 00:00:00
max date on localidad 2018-03-31 00:00:00
samples_num 26
lT complete 3.7142857142857144
lT aprox 3
21 21
SUBA
remove outliers
            total_eventos
date                     
2018-03-01            125
2018-03-02            127
2018-03-03            210
set consecutive dates
            total_eventos
date                     
2018-03-01            125
2018-03-02            127
2018-03-03            210
monday_idx 4
starts on monday
            total_eventos
date                     
2018-03-05

remove outliers
            total_eventos
date                     
2018-04-01             17
2018-04-02             20
2018-04-03             29
set consecutive dates
            total_eventos
date                     
2018-04-01             17
2018-04-02             20
2018-04-03             29
monday_idx 1
starts on monday
            total_eventos
date                     
2018-04-02             20
2018-04-03             29
2018-04-04             19
min date on localidad 2018-04-02 00:00:00
max date on localidad 2018-04-30 00:00:00
samples_num 28
lT complete 4.0
lT aprox 4
28 28
SUBA
remove outliers
            total_eventos
date                     
2018-04-01            168
2018-04-02            101
2018-04-03             96
set consecutive dates
            total_eventos
date                     
2018-04-01            168
2018-04-02            101
2018-04-03             96
monday_idx 1
starts on monday
            total_eventos
date                     
2018-04-02            101

remove outliers
            total_eventos
date                     
2018-05-01             29
2018-05-02             20
2018-05-03             26
set consecutive dates
            total_eventos
date                     
2018-05-01             29
2018-05-02             20
2018-05-03             26
monday_idx 6
starts on monday
            total_eventos
date                     
2018-05-07             28
2018-05-08             18
2018-05-09             20
min date on localidad 2018-05-07 00:00:00
max date on localidad 2018-05-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
SUBA
remove outliers
            total_eventos
date                     
2018-05-01            171
2018-05-02            128
2018-05-03            113
set consecutive dates
            total_eventos
date                     
2018-05-01            171
2018-05-02            128
2018-05-03            113
monday_idx 6
starts on monday
            total_eventos
date                     
2018-05-07

set consecutive dates
            total_eventos
date                     
2018-06-01             25
2018-06-02             23
2018-06-03             30
monday_idx 3
starts on monday
            total_eventos
date                     
2018-06-04             11
2018-06-05             18
2018-06-06             19
min date on localidad 2018-06-04 00:00:00
max date on localidad 2018-06-30 00:00:00
samples_num 26
lT complete 3.7142857142857144
lT aprox 3
21 21
SUBA
remove outliers
            total_eventos
date                     
2018-06-01            134
2018-06-02            213
2018-06-03            275
set consecutive dates
            total_eventos
date                     
2018-06-01            134
2018-06-02            213
2018-06-03            275
monday_idx 3
starts on monday
            total_eventos
date                     
2018-06-04            152
2018-06-05            104
2018-06-06             97
min date on localidad 2018-06-04 00:00:00
max date on localidad 2018-06-30 00:

SANTA FE
remove outliers
            total_eventos
date                     
2018-07-01             31
2018-07-02             16
2018-07-03             19
set consecutive dates
            total_eventos
date                     
2018-07-01             31
2018-07-02             16
2018-07-03             19
monday_idx 1
starts on monday
            total_eventos
date                     
2018-07-02             16
2018-07-03             19
2018-07-04             17
min date on localidad 2018-07-02 00:00:00
max date on localidad 2018-07-31 00:00:00
samples_num 29
lT complete 4.142857142857143
lT aprox 4
28 28
SUBA
remove outliers
            total_eventos
date                     
2018-07-01            243
2018-07-02            148
2018-07-03            171
set consecutive dates
            total_eventos
date                     
2018-07-01            243
2018-07-02            148
2018-07-03            171
monday_idx 1
starts on monday
            total_eventos
date                     
20

min date on localidad 2018-08-06 00:00:00
max date on localidad 2018-08-31 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
KENNEDY
remove outliers
            total_eventos
date                     
2018-08-01            122
2018-08-02            110
2018-08-03            125
set consecutive dates
            total_eventos
date                     
2018-08-01            122
2018-08-02            110
2018-08-03            125
monday_idx 5
starts on monday
            total_eventos
date                     
2018-08-06            105
2018-08-07            114
2018-08-08            114
min date on localidad 2018-08-06 00:00:00
max date on localidad 2018-08-31 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
LOS MARTIRES
remove outliers
            total_eventos
date                     
2018-08-01             13
2018-08-02             12
2018-08-03             22
set consecutive dates
            total_eventos
date                     
2018-08

remove outliers
            total_eventos
date                     
2018-09-01             35
2018-09-02             33
2018-09-03             29
set consecutive dates
            total_eventos
date                     
2018-09-01             35
2018-09-02             33
2018-09-03             29
monday_idx 2
starts on monday
            total_eventos
date                     
2018-09-03             29
2018-09-04             32
2018-09-05             23
min date on localidad 2018-09-03 00:00:00
max date on localidad 2018-09-30 00:00:00
samples_num 27
lT complete 3.857142857142857
lT aprox 3
21 21
CIUDAD BOLIVAR
remove outliers
            total_eventos
date                     
2018-09-01            113
2018-09-02            195
2018-09-03             70
set consecutive dates
            total_eventos
date                     
2018-09-01            113
2018-09-02            195
2018-09-03             70
monday_idx 2
starts on monday
            total_eventos
date                     
2

BARRIOS UNIDOS
remove outliers
            total_eventos
date                     
2018-10-01             26
2018-10-02             25
2018-10-03             28
set consecutive dates
            total_eventos
date                     
2018-10-01             26
2018-10-02             25
2018-10-03             28
monday_idx 0
starts on monday
            total_eventos
date                     
2018-10-01             26
2018-10-02             25
2018-10-03             28
min date on localidad 2018-10-01 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 30
lT complete 4.285714285714286
lT aprox 4
28 28
BOSA
remove outliers
            total_eventos
date                     
2018-10-01             72
2018-10-02             72
2018-10-03             77
set consecutive dates
            total_eventos
date                     
2018-10-01             72
2018-10-02             72
2018-10-03             77
monday_idx 0
starts on monday
            total_eventos
date                  

TUNJUELITO
remove outliers
            total_eventos
date                     
2018-10-01             17
2018-10-02             17
2018-10-03             19
set consecutive dates
            total_eventos
date                     
2018-10-01             17
2018-10-02             17
2018-10-03             19
monday_idx 0
starts on monday
            total_eventos
date                     
2018-10-01             17
2018-10-02             17
2018-10-03             19
min date on localidad 2018-10-01 00:00:00
max date on localidad 2018-10-31 00:00:00
samples_num 30
lT complete 4.285714285714286
lT aprox 4
28 28
USAQUEN
remove outliers
            total_eventos
date                     
2018-10-01             54
2018-10-02             40
2018-10-03             57
set consecutive dates
            total_eventos
date                     
2018-10-01             54
2018-10-02             40
2018-10-03             57
monday_idx 0
starts on monday
            total_eventos
date                   

remove outliers
            total_eventos
date                     
2018-11-01             75
2018-11-02             51
2018-11-03             68
set consecutive dates
            total_eventos
date                     
2018-11-01             75
2018-11-02             51
2018-11-03             68
monday_idx 4
starts on monday
            total_eventos
date                     
2018-11-05             66
2018-11-06             45
2018-11-07             43
min date on localidad 2018-11-05 00:00:00
max date on localidad 2018-11-30 00:00:00
samples_num 25
lT complete 3.5714285714285716
lT aprox 3
21 21
SANTA FE
remove outliers
            total_eventos
date                     
2018-11-01             20
2018-11-02             27
2018-11-04             32
set consecutive dates
            total_eventos
date                     
2018-11-01             20
2018-11-02             27
2018-11-03             23
monday_idx 4
starts on monday
            total_eventos
date                     
2018-1

min date on localidad 2018-12-03 00:00:00
max date on localidad 2018-12-31 00:00:00
samples_num 28
lT complete 4.0
lT aprox 4
28 28
KENNEDY
remove outliers
            total_eventos
date                     
2018-12-01            206
2018-12-02            259
2018-12-03            124
set consecutive dates
            total_eventos
date                     
2018-12-01            206
2018-12-02            259
2018-12-03            124
monday_idx 2
starts on monday
            total_eventos
date                     
2018-12-03            124
2018-12-04             95
2018-12-05            119
min date on localidad 2018-12-03 00:00:00
max date on localidad 2018-12-31 00:00:00
samples_num 28
lT complete 4.0
lT aprox 4
28 28
LOS MARTIRES
remove outliers
            total_eventos
date                     
2018-12-01             27
2018-12-03             24
2018-12-04             26
set consecutive dates
            total_eventos
date                     
2018-12-01             27
2018-12-02 

monday_idx 6
starts on monday
            total_eventos
date                     
2019-01-07              5
2019-01-08              2
2019-01-09              3
min date on localidad 2019-01-07 00:00:00
max date on localidad 2019-01-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
CHAPINERO
remove outliers
            total_eventos
date                     
2019-01-01             26
2019-01-02             15
2019-01-03             14
set consecutive dates
            total_eventos
date                     
2019-01-01             26
2019-01-02             15
2019-01-03             14
monday_idx 6
starts on monday
            total_eventos
date                     
2019-01-07             11
2019-01-08             19
2019-01-09             37
min date on localidad 2019-01-07 00:00:00
max date on localidad 2019-01-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21
CIUDAD BOLIVAR
remove outliers
            total_eventos
date                 

monday_idx 6
starts on monday
            total_eventos
date                     
2019-01-07             39
2019-01-08             29
2019-01-09             35
min date on localidad 2019-01-07 00:00:00
max date on localidad 2019-01-31 00:00:00
samples_num 24
lT complete 3.4285714285714284
lT aprox 3
21 21


In [94]:
df_prediction["month"] = df_prediction.period.str.extract(r'(\d{2}$)')
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'
df_prediction.loc[df_prediction.period.str.contains("2019"), 'year'] = '2019'

In [95]:
df_prediction

Unnamed: 0,experiment_name,period,localidad,lenWindow,crime_level,predictability,contingency,constancy,month,year
0,aggressiveBehavior_localidad_by_month,2017/01,ANTONIO NARIÑO,7,3,0.4591479170272442,0.37744375108173367,0.08170416594551055,01,2017
1,aggressiveBehavior_localidad_by_month,2017/01,BARRIOS UNIDOS,7,3,0.5629072918469555,0.5427760484981082,0.020131243348847305,01,2017
2,aggressiveBehavior_localidad_by_month,2017/01,BOSA,7,3,0.7295739585136223,0.3795963801619764,0.3499775783516459,01,2017
3,aggressiveBehavior_localidad_by_month,2017/01,CANDELARIA,7,3,0.19812031259014362,0.15255431551510867,0.04556599707503495,01,2017
4,aggressiveBehavior_localidad_by_month,2017/01,CHAPINERO,7,3,0.4276942711037667,0.16597913724721725,0.26171513385654943,01,2017
...,...,...,...,...,...,...,...,...,...,...
470,aggressiveBehavior_localidad_by_month,2019/01,SUBA,7,3,1.0,0.9182958340544894,0.08170416594551055,01,2019
471,aggressiveBehavior_localidad_by_month,2019/01,TEUSAQUILLO,7,3,0.540852082972755,0.1908745046211091,0.3499775783516459,01,2019
472,aggressiveBehavior_localidad_by_month,2019/01,TUNJUELITO,7,3,0.540852082972755,0.45914791702724445,0.08170416594551055,01,2019
473,aggressiveBehavior_localidad_by_month,2019/01,USAQUEN,7,3,0.540852082972755,0.3932572616222336,0.14759482135052138,01,2019


### Localidades with higher predictability values

In [96]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="month", y="predictability",color="year")
fig.show()

In [73]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

### Localidades with low predictability values

In [74]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()

In [75]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year")
fig.show()