# Experiment description
## Hypothesis: 
Peaks observed on experiment 12 (using FFT approach) are also observed using periodogram and multitaper approach.

## Method: 
Periodogram and multitaper spectral estimation to identify periodicity on time series signals by localidad. Time series to analyze***:

- Localidades with high predictability (P>0.7) and low variance (variance < 0.2): Ciudad Bolívar, Bosa, Usme, San Cristobal, Rafael Uribe Uribe, Suba, Kennedy. 
- Localidades with medium predictability ([0.4, 0.7]) and variance ([0.2, 0.4]): Engativa, Tunjuelito, Fontibon, Usaquen
- Localidades with low predictability (P < 0.4) and high variance (variance > 0.4): Antonio Nariño, Santa Fe, Candelaria, Barrios Unidos y Mártires.

** Localidades that dont fit on former classification: Puente Aranda, Chapinero, Teusaquillo

*** Classification based on predictability results obtained on experiment 08 and variance of experiment 11

## Built-in methods

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import calendar
import scipy as sp
import scipy.fftpack
from scipy.signal import find_peaks

import nitime.algorithms as tsa
import nitime.utils as utils
from nitime.viz import winspect
from nitime.viz import plot_spectral_estimate
import math

from IPython.display import display, HTML
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)

In [None]:
def set_initial_dataset_day(df_by_date,name_day):
    df_by_date = df_by_date.reset_index()
    df_by_date['day_of_week'] = df_by_date['date'].dt.day_name()
    monday_idx = df_by_date.index[df_by_date['day_of_week'] == name_day].tolist()[0]
    df_by_date = df_by_date[monday_idx:].set_index('date').drop(['day_of_week'],axis=1)
    return df_by_date

In [None]:
def preprocess_df_localidad(df_by_localidad):   
    #Make sure dataset starts on Monday for the experiment
    df_by_localidad = set_initial_dataset_day(df_by_localidad,'Monday')

    #Make sure dataset include consecutive dates in period
    idx = pd.date_range(min(df_by_localidad.reset_index().date), max(df_by_localidad.reset_index().date))
    df_by_localidad = df_by_localidad.reindex(idx, fill_value=0)
    
    lenWindow = 7
    lT=(len(df_by_localidad)//lenWindow)*lenWindow 
    df_localidad_values = pd.Series(df_by_localidad['total_eventos']).values
    df_localidad_values = df_localidad_values[0:lT]

    df_localidad_values = df_localidad_values/(df_localidad_values.max())
    
    return df_localidad_values

In [None]:
def get_peaks(signal, custom_distance):
    peaks, _ = find_peaks(signal, distance = custom_distance)
    return peaks

In [None]:
def dB(x, out=None):
    if out is None:
        return 10 * np.log10(x)
    else:
        np.log10(x, out)
        np.multiply(out, 10, out)

In [None]:
def mt_peaks(f, psd_mt):
    for freq in get_peaks(psd_mt, 200):
        print('mt value: ', psd_mt[freq].round(2), 'angular frequency: ',f[freq].round(2), 'Period (days): ', (2*math.pi/f[freq]).round(2))

## Data

In [None]:
data_location = '/Users/anamaria/Desktop/dev/security_project/datasets/06. verify_enrich_nuse_11022020.csv'
df_input = pd.read_csv(data_location,delimiter=",")

In [None]:
df = df_input.copy()

In [None]:
df = df.loc[df['ANIO']!= 2019]
df['date']=pd.to_datetime(df['FECHA']).dt.strftime('%Y-%m-%d')
df['date']=pd.to_datetime(df['date'],format='%Y-%m-%d')
df_by_date = pd.DataFrame(df.groupby(['date','PERIODO_TS','LOCALIDAD']).size(),columns=["total_eventos"])

In [None]:
df_by_date = df_by_date.reset_index().set_index('date')

# Results
## Experiment localidades with high predictability and low predictability variance

In [None]:
localidadesList = ['CIUDAD BOLIVAR','BOSA','USME','SAN CRISTOBAL','RAFAEL URIBE URIBE','SUBA','KENNEDY']

In [None]:
for localidad in localidadesList:
    df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]
    df_localidad_values = preprocess_df_localidad(df_by_localidad)
    
    #Plot Periodogram
    freqs, d_psd = tsa.periodogram(df_localidad_values)
    dB(d_psd, d_psd)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(freqs, d_psd)
    plt.title(localidad+" (periodogram)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    #Plot multitaper results
    f, psd_mt, nu = tsa.multi_taper_psd(df_localidad_values, adaptive=False, jackknife=False)
    dB(psd_mt, psd_mt)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(f, psd_mt)
    plt.title(localidad+" (multitaper)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    mt_peaks(f, psd_mt)
    

## Experiment localidades with medium predictability values and variance

In [None]:
localidadesList = ['ENGATIVA','TUNJUELITO','FONTIBON','USAQUEN']

In [None]:
for localidad in localidadesList:
    df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]
    df_localidad_values = preprocess_df_localidad(df_by_localidad)
    
    #Plot Periodogram
    freqs, d_psd = tsa.periodogram(df_localidad_values)
    dB(d_psd, d_psd)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(freqs, d_psd)
    plt.title(localidad+" (periodogram)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    #Plot multitaper results
    f, psd_mt, nu = tsa.multi_taper_psd(df_localidad_values, adaptive=False, jackknife=False)
    dB(psd_mt, psd_mt)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(f, psd_mt)
    plt.title(localidad+" (multitaper)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    mt_peaks(f, psd_mt)

## Experiment localidades with low predictability values and high predictability variance

In [None]:
localidadesList = ['ANTONIO NARIÑO','SANTA FE','CANDELARIA','BARRIOS UNIDOS','LOS MARTIRES']

In [None]:
for localidad in localidadesList:
    df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]
    df_localidad_values = preprocess_df_localidad(df_by_localidad)
    
    #Plot Periodogram
    freqs, d_psd = tsa.periodogram(df_localidad_values)
    dB(d_psd, d_psd)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(freqs, d_psd)
    plt.title(localidad+" (periodogram)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    #Plot multitaper results
    f, psd_mt, nu = tsa.multi_taper_psd(df_localidad_values, adaptive=False, jackknife=False)
    dB(psd_mt, psd_mt)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(f, psd_mt)
    plt.title(localidad+" (multitaper)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    mt_peaks(f, psd_mt)

## Experiment particular localidades

In [None]:
localidadesList = ['PUENTE ARANDA','CHAPINERO','TEUSAQUILLO']

In [None]:
for localidad in localidadesList:
    df_by_localidad = df_by_date[df_by_date['LOCALIDAD'] == localidad]
    df_localidad_values = preprocess_df_localidad(df_by_localidad)
    
    #Plot Periodogram
    freqs, d_psd = tsa.periodogram(df_localidad_values)
    dB(d_psd, d_psd)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(freqs, d_psd)
    plt.title(localidad+" (periodogram)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    #Plot multitaper results
    f, psd_mt, nu = tsa.multi_taper_psd(df_localidad_values, adaptive=False, jackknife=False)
    dB(psd_mt, psd_mt)
    fig, ax = plt.subplots(1,1,sharex=True, sharey=True)
    ax.plot(f, psd_mt)
    plt.title(localidad+" (multitaper)")
    plt.ylabel("Amplitude (db)")
    plt.xlabel("Angular frequency")
    plt.show()
    
    mt_peaks(f, psd_mt)