# Experiment description
## Hypothesis: 
Predictability localidad values are similar (presents low variance) among intervals on full dataset

## Method: 
- Remove outliers
- Measure predictability for 7 days timewindows and levelCrime=3 on intervals for full dataset (months, bimesters, semesters)
- Implement a metric to compare the difference among predictability values

## Parameters: 
- Time windows: 7
- Crime levels: 3
- Aggregation: localidades

## Built-in methods

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
import matplotlib.pyplot as plt
import scipy
import math
from math import pi
import geopandas as gpd
import plotly.express as px
%matplotlib inline

In [43]:
workingPath= '/home/combios/Documents/amreyesp/security_project/'

In [44]:
def set_initial_dataset_day(df_by_date,name_day):
    df_by_date = df_by_date.reset_index()
    df_by_date['day_of_week'] = df_by_date['date'].dt.day_name()
    monday_idx = df_by_date.index[df_by_date['day_of_week'] == name_day].tolist()[0]
    df_by_date = df_by_date[monday_idx:].set_index('date').drop(['day_of_week'],axis=1)
    return df_by_date

In [45]:
# Methods for time windows
def im2patches(im,n):
    patches = [];
    for i in range(len(im)-n):
        patch = im[i:(i+n-1)]        
        patch = patch - np.nanmean(patch);
        if(np.linalg.norm(patch)>0):
            patch = patch/np.linalg.norm(patch);
        if i==0:
            patches = patch;
        else:
            patches = np.vstack((patches,patch))
    return patches;

def writeEmbeding(timeSeries,lenWindow,samplePath, scenarioName):
    slicingWindows = im2patches(timeSeries,lenWindow);
    experimentPath = 'periodicity_experiments/predictability/slicing/'
    prevStation = str(samplePath);
    with open(workingPath+experimentPath+'slicingWindows'+"_"+str(prevStation)+"_"+str(scenarioName)+"_"+str(lenWindow)+'_.pickle', 'wb') as f:
        lv = slicingWindows.tolist();                        
        pickle.dump(lv, f, protocol=2)

    experimentPath = 'periodicity_experiments/predictability/timeSeries/'    
    with open(workingPath+experimentPath+'timeSeries'+"_"+str(prevStation)+"_"+str(scenarioName)+"_"+str(lenWindow)+'_.pickle', 'wb') as f:
        lv = timeSeries.tolist();                        
        pickle.dump(lv, f, protocol=2)



In [46]:
#Methods for predictability
def getBarcode(samplePath,lenWindow,scenarioName):
    experimentPath = 'periodicity_experiments/predictability/'
    barcode = [];

    with open(workingPath+experimentPath+'timeSeries/'+'timeSeries_'+samplePath+"_"+str(scenarioName)+'_'+str(lenWindow)+'_'+'.pickle', 'rb') as f:
            timeSeries = pickle.load(f);            
    return (barcode,timeSeries);

def computeBarcodeEntropy(barsLenB0):
    barlen = np.array(barsLenB0);
    barlen = barlen/barlen.sum();
    hbc = 0;
    for i in range(barlen.shape[0]):
        if barlen[i]!=0:
            hbc = hbc-(barlen[i])*np.log(barlen[i]);
    return hbc;


def computeGeneralPredictability(timeSeries,binsData,lenWindow):
    # Colwell, R. K. (1974). Predictability, constancy, and contingency of periodic phenomena. Ecology, 55(5), 1148-1153.
    # Normalize the caudal values
    nLevels = binsData.shape[0]-1;
    matStations = np.array(timeSeries).reshape((np.array(timeSeries).shape[0]//lenWindow,lenWindow))    

    grandMean = np.mean(np.mean(matStations));
    #matStations = matStations / grandMean;
    N = np.zeros((nLevels,lenWindow));
    for i in range(1,matStations.shape[1]): 
        # Computes histograms per columns
        hist, bin_edges = np.histogram(matStations[:,i],bins = binsData);
        N[:,i] = hist;
    X = np.sum(N, axis=0);
    Y = np.sum(N, axis=1);
    Z = np.sum(Y);
    hx = 0;
    hy = 0;
    hxy = 0;
    for j in range(X.shape[0]):
        if X[j]!=0:
            hx = hx-(X[j]/Z)*np.log(X[j]/Z);
            
    for i in range(Y.shape[0]):
        if Y[i]!=0:
            hy = hy-(Y[i]/Z)*np.log(Y[i]/Z);
            
    for i in range(Y.shape[0]):
        for j in range(X.shape[0]):
            if N[i,j]!=0:
                hxy = hxy-((N[i,j]/Z)*np.log(N[i,j]/Z));    
    
    # predictability
    p = 1 - (hxy - hx)/np.log(N.shape[0]);
    # constancy
    c = 1 - hy/np.log(N.shape[0]);
    # Returns constancy and contingency
    return (c,p-c,p);



In [47]:
def preprocess_df(df,min_date_period,max_date_period):
    df=df.drop(columns=['PERIODO_TS','LOCALIDAD'])
    #Remove outliers
    q_hi = df["total_eventos"].quantile(0.99)
    df = df[(df["total_eventos"] < q_hi)]

    #Make sure dataset include consecutive dates in period
    idx = pd.date_range(min_date_period, max_date_period)
    print(df["total_eventos"])
    df = df.reindex(idx, fill_value=int(df["total_eventos"].mean()))
    df = df.reset_index().rename(columns={'index': 'date'}).set_index('date')
    
    #Make sure dataset starts on Monday for the experiment
    df = set_initial_dataset_day(df,'Monday')
    
    return df

In [48]:
def saveTimeSeries(df,min_date_period,max_date_period,localidad, lenWindow, expName):       
    df_values = pd.Series(df['total_eventos']).values
    lT=get_LT(df, lenWindow)
    df_values = df_values[0:lT]
    writeEmbeding(df_values,lenWindow,expName,localidad)

In [49]:
def get_LT(df_by_period,lenWindow):
    min_date = df_by_period.reset_index().date.min()
    max_date = df_by_period.reset_index().date.max()
    samples_num = (max_date.date()-min_date.date()).days
    lT = samples_num//lenWindow * lenWindow
    return lT

In [50]:
def predictability_experiment_localidades(df_by_date,min_date_period,max_date_period,lenWindow,localidadesList,Levels,expName,periodName):
    #workingPath = '/Users/anamaria/Desktop/dev/security_project/periodicity_experiments/predictability/';

    flagF = True;
    for localidad in localidadesList:
        #write embeding
        print(localidad)
        df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]        
        df_by_localidad = preprocess_df(df_by_localidad,min_date_period,max_date_period)
        saveTimeSeries(df_by_localidad,min_date_period,max_date_period,localidad, lenWindow, expName)
        
        for nLevels in Levels:
            (barcode,timeSeries) = getBarcode(expName,lenWindow,localidad);
            binsLevels = np.linspace(np.min(timeSeries),np.max(timeSeries),nLevels);
            c,m,p = computeGeneralPredictability(timeSeries,binsLevels,lenWindow)

            if flagF==True:
                flagF = False
                predValues = np.array([expName,periodName,localidad,lenWindow,nLevels,p,m,c]);
            else:
                predValues = np.vstack((predValues, [expName,periodName,localidad,lenWindow,nLevels,p,m,c]))

    return predValues
        
    

In [51]:
def table_predictability_by_period_report(df_agressiveBehavior,lenWindow,localidadesList,yAxisCategories,name_experiment,nLevel):
    join=df_agressiveBehavior.pivot('localidad','period','predictability')
    var1_order = []
    var2_order = yAxisCategories
    if len(var2_order) > 0:
        join = join.reindex(var2_order, axis=1)
    if len(var1_order) > 0:
        join = join.reindex(var1_order)
    
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    fig.set_size_inches(7, 6)
    g=sns.heatmap(join.astype('float'),annot=True,fmt=".3",linewidths=0,cmap="Blues",cbar=False)
    g.set_yticklabels(g.get_yticklabels(), rotation = 0)
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    file_path = 'periodicity_experiments/predictability/figures/'
    plt.savefig(workingPath+file_path+'table_'+str(name_experiment)+'_predictability_time_'+str(lenWindow)+'_levels_'+str(nLevel),dpi=300,bbox_inches = "tight")
    plt.show()

In [52]:
def map_localidad(ax,df,col_localidad,col_vals,vmin=None,vmax=None):
  loc_geo=workingPath+"assets/localidades_polygon.json"
  loc_=gpd.read_file(loc_geo)
  loc_=loc_.merge(df,left_on='LocNombre',right_on=col_localidad)
  loc_.plot(cmap='viridis',column=col_vals,legend=True,ax=ax,vmin=vmin,vmax=vmax)  

In [53]:
def map_predictability(df_crime, crime_level, lenWindow,name_experiment):
    subdata = df_crime[df_crime['crime_level']==crime_level]
    subdata = subdata[subdata['lenWindow']==str(lenWindow)]
    subdata["predictability"] = pd.to_numeric(subdata["predictability"])
    
    fig, ax = plt.subplots(figsize=(12,12))
    map_localidad(ax,subdata,'localidad','predictability')
    ax.axis('off')
    file_path = 'periodicity_experiments/predictability/figures/'
    plt.savefig(workingPath+file_path+'map_aggressiveBehavior_localidades'+str(name_experiment)+'_predictability_time_'+str(lenWindow)+'_levels_'+str(crime_level),dpi=300,bbox_inches = "tight")
    plt.show()

In [54]:
def build_semester_list(year,semester=1):
    if semester == 1:
        month_list = ['01','02','03','04','05','06']
    if semester == 2:
        month_list = ['07','08','09','10','11','12']
    semester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(semester_list))

def build_trimester_list(year,trimester=1):
    if trimester == 1:
        month_list = ['01','02','03']
    if trimester == 2:
        month_list = ['04','05','06']
    if trimester == 3:
        month_list = ['07','08','09']
    if trimester == 4:
        month_list = ['10','11','12']
    trimester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(trimester_list))

def build_bimester_list(year,bimester=1):
    if bimester == 1:
        month_list = ['01','02']
    if bimester == 2:
        month_list = ['03','04']
    if bimester == 3:
        month_list = ['05','06']
    if bimester == 4:
        month_list = ['07','08']
    if bimester == 5:
        month_list = ['09','10']
    if bimester == 6:
        month_list = ['11','12']
    bimester_list = map(lambda m: year+'/'+str(m), month_list)
    return (list(bimester_list))

## Load data

In [55]:
data_location = '/home/combios/Documents/amreyesp/clean_nuse_data/verify_enrich_nuse_11022020.csv'
df_input = pd.read_csv(data_location,delimiter=",")


Columns (9,11) have mixed types. Specify dtype option on import or set low_memory=False.



In [56]:
df_input['date']=pd.to_datetime(df_input['FECHA']).dt.strftime('%Y-%m-%d')
df_by_date = pd.DataFrame(df_input.groupby(['date','PERIODO_TS','LOCALIDAD']).size(),columns=["total_eventos"])

In [57]:
df_by_date = df_by_date.reset_index().set_index('date')

## Localidades, semester

In [58]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [59]:
years = list(df_input['ANIO'].unique())
semesters = [1, 2]

In [62]:
predValues_array = []
for year in years:
    for semester in semesters:
        period_list = build_semester_list(str(year),semester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()

        expName = 'aggressiveBehavior_localidad_by_semester'
        periodName = str(year)+'-'+str(semester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



['2014/01', '2014/02', '2014/03', '2014/04', '2014/05', '2014/06']
ANTONIO NARIÑO
date
2014-01-01    29
2014-01-02     7
2014-01-03    17
2014-01-04    15
2014-01-05    14
              ..
2014-06-25     9
2014-06-26    11
2014-06-27    17
2014-06-29    28
2014-06-30    11
Name: total_eventos, Length: 179, dtype: int64
BARRIOS UNIDOS
date
2014-01-01    32
2014-01-02    13
2014-01-03    16
2014-01-04    22
2014-01-05    16
              ..
2014-06-26    20
2014-06-27    15
2014-06-28    30
2014-06-29    27
2014-06-30    15
Name: total_eventos, Length: 179, dtype: int64
BOSA
date
2014-01-02     39
2014-01-03     61
2014-01-04     82
2014-01-05     77
2014-01-06     50
             ... 
2014-06-26     49
2014-06-27     56
2014-06-28    142
2014-06-29    172
2014-06-30    128
Name: total_eventos, Length: 179, dtype: int64
CANDELARIA
date
2014-01-02    2
2014-01-03    4
2014-01-04    6
2014-01-05    7
2014-01-06    2
             ..
2014-06-26    3
2014-06-27    7
2014-06-28    5
2014-06-29

ValueError: cannot convert float NaN to integer

In [None]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'semester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'semester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

In [None]:
df_prediction['predictability']=pd.to_numeric(df_prediction['predictability'])
df_prediction.dtypes

### Localidades with higher predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 2])
fig.update_yaxes(range=[0.5, 1.1])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 2])
fig.update_yaxes(range=[0.5, 1.1])
fig.show()

### Localidades with low predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 2])
fig.update_yaxes(range=[0, 0.5])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="semester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 2])
fig.update_yaxes(range=[0, 0.5])
fig.show()

## Localidades, trimester

In [None]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [None]:
years = list(df_input['ANIO'].unique())
years.remove(2019)
trimesters = [1, 2, 3, 4]

In [None]:
predValues_array = []
for year in years:
    for trimester in trimesters:
        period_list = build_trimester_list(str(year),trimester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
        #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()

        expName = 'aggressiveBehavior_localidad_by_trimester'
        periodName = str(year)+'-'+str(trimester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



In [None]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'trimester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'trimester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("-3"), 'trimester'] = 'III'
df_prediction.loc[df_prediction.period.str.contains("-4"), 'trimester'] = 'IV'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

In [None]:
df_prediction['predictability']=pd.to_numeric(df_prediction['predictability'])
df_prediction.dtypes

### Localidades with higher predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 4])
fig.update_yaxes(range=[0.5, 1])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 4])
fig.update_yaxes(range=[0.5, 1])
fig.show()

### Localidades with low predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 4])
fig.update_yaxes(range=[0, 0.5])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="trimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 4])
fig.update_yaxes(range=[0, 0.5])
fig.show()

## Localidades, bimester

In [None]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [None]:
years = list(df_input['ANIO'].unique())
years.remove(2019)
bimesters = [1, 2, 3, 4, 5, 6]

In [None]:
predValues_array = []
for year in years:
    for bimester in bimesters:
        period_list = build_bimester_list(str(year),bimester)
        print(period_list)
        df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
        #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

        min_date_on_period = df_by_period.reset_index().date.min()
        max_date_on_period = df_by_period.reset_index().date.max()

        expName = 'aggressiveBehavior_localidad_by_bimester'
        periodName = str(year)+'-'+str(bimester)
        predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
        predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



In [None]:
df_prediction.loc[df_prediction.period.str.contains("-1"), 'bimester'] = 'I'
df_prediction.loc[df_prediction.period.str.contains("-2"), 'bimester'] = 'II'
df_prediction.loc[df_prediction.period.str.contains("-3"), 'bimester'] = 'III'
df_prediction.loc[df_prediction.period.str.contains("-4"), 'bimester'] = 'IV'
df_prediction.loc[df_prediction.period.str.contains("-5"), 'bimester'] = 'V'
df_prediction.loc[df_prediction.period.str.contains("-6"), 'bimester'] = 'VI'
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'

In [None]:
df_prediction['predictability']=pd.to_numeric(df_prediction['predictability'])
df_prediction.dtypes

### Localidades with higher predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 6])
fig.update_yaxes(range=[0.5, 1.1])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 6])
fig.update_yaxes(range=[0.5, 1.1])
fig.show()

### Localidades with low predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 6])
fig.update_yaxes(range=[0, 0.7])
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="bimester", y="predictability",color="year",size="predictability")
fig.update_xaxes(range=[-1, 6])
fig.update_yaxes(range=[0, 0.7])
fig.show()

## Localidades, months

In [None]:
Levels=[3]
lenWindow = 7
levelCategories = list(map(lambda x: str(x), Levels))
localidadesList = list(df_by_date.LOCALIDAD.unique())
localidadesList.remove('SIN LOCALIZACION')

In [None]:
months = df_by_date.PERIODO_TS.unique()
months

In [None]:
# NOTE: change size table
#fig.set_size_inches(14, 6)

In [None]:
predValues_array = []
for month in months:
    period_list = []
    period_list.append(month)
    print(period_list)
    df_by_period=df_by_date[df_by_date.PERIODO_TS.isin(period_list)]
    #df_by_period = set_initial_dataset_day(df_by_period,'Monday')

    min_date_on_period = df_by_period.reset_index().date.min()
    max_date_on_period = df_by_period.reset_index().date.max()

    expName = 'aggressiveBehavior_localidad_by_month'
    periodName = str(month)
    predValues = predictability_experiment_localidades(df_by_period,min_date_on_period,max_date_on_period,lenWindow,localidadesList,Levels,expName,periodName)
    predValues_array = predValues_array + list(predValues)
    
df_prediction = pd.DataFrame(predValues_array, columns=['experiment_name','period','localidad','lenWindow','crime_level','predictability','contingency','constancy'])
#periodCategories = list(df_prediction['period'].unique())
#table_predictability_by_period_report(df_prediction,lenWindow,localidadesList,periodCategories,expName,Levels[0])



In [None]:
df_prediction["month"] = df_prediction.period.str.extract(r'(\d{2}$)')
df_prediction.loc[df_prediction.period.str.contains("2017"), 'year'] = '2017'
df_prediction.loc[df_prediction.period.str.contains("2018"), 'year'] = '2018'
df_prediction.loc[df_prediction.period.str.contains("2019"), 'year'] = '2019'

In [None]:
df_prediction['predictability']=pd.to_numeric(df_prediction['predictability'])
df_prediction.dtypes

### Localidades with higher predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "CIUDAD BOLIVAR"]
fig = px.scatter(df_localidad, x="month", y="predictability",color="year",size="predictability")
#fig.update_xaxes(range=[-1, 12])
fig.update_yaxes(range=[0, 1.05])
fig.update_yaxes(nticks=20)
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "BOSA"]
fig = px.scatter(df_localidad, x="month", y="predictability",color="year",size="predictability")
#fig.update_xaxes(range=[-1, 12])
fig.update_yaxes(range=[0, 1.05])
fig.update_yaxes(nticks=20)
fig.show()

### Localidades with low predictability values

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "TEUSAQUILLO"]
fig = px.scatter(df_localidad, x="month", y="predictability",color="year",size="predictability")
#fig.update_xaxes(range=[-1, 12])
fig.update_yaxes(range=[0, 1.05])
fig.update_yaxes(nticks=20)
fig.show()

In [None]:
df_localidad = df_prediction[df_prediction["localidad"] == "LOS MARTIRES"]
fig = px.scatter(df_localidad, x="month", y="predictability",color="year",size="predictability")
#fig.update_xaxes(range=[-1, 12])
fig.update_yaxes(range=[0, 1.05])
fig.update_yaxes(nticks=20)
fig.show()