# Explore kalman filter



This notebook explores how to run the Kalman filter on mock data 

In [1]:
%load_ext autoreload
%autoreload 2

# 1. Get the data to pass to the filter 

In [2]:
import sys
import glob 
sys.path.append('../src')
from data_loader import LoadWidebandPulsarData

In [None]:

import pandas as pd 

datadir = '../data/IPTA_MockDataChallenge/IPTA_Challenge1_open/Challenge_Data/Dataset2/'


# Get all .par files in the directory
par_files = sorted(glob.glob(datadir + '*.par'))
tim_files = sorted(glob.glob(datadir + '*.tim'))

assert len(par_files) == len(tim_files) 


# Combine par_files and tim_files into pairs




dfs = []
dfs_meta = []
total_num_rows = 0 
i = 0
# Check we can load the files with no errors
for par_file, tim_file in file_pairs[0:2]:
    psr = LoadWidebandPulsarData.read_par_tim(par_file, tim_file)

    print(psr.M_matrix.shape)
    print(psr.fitpars)

    df = pd.DataFrame({'toas': psr.toas, f'residuals_{i}': psr.residuals})

    df_meta = pd.DataFrame({'name': [psr.name], f'dim_M': [psr.M_matrix.shape[-1]],f'RA': [psr.RA],f'DEC': [psr.DEC]})

    dfs.append(df)
    dfs_meta.append(df_meta)

    total_num_rows += len(df)
    i += 1
 






(130, 8)
['Offset', 'RAJ', 'DECJ', 'F0', 'F1', 'PMRA', 'PMDEC', 'PX']




(130, 11)
['Offset', 'RAJ', 'DECJ', 'F0', 'F1', 'PX', 'PB', 'A1', 'TASC', 'EPS1', 'EPS2']


In [4]:
dfs_meta 

[         name  dim_M        RA       DEC
 0  J0030+0451      8  0.132895  0.084841,
          name  dim_M      RA       DEC
 0  J0218+4232     11  0.6026  0.742431]

In [5]:
from functools import reduce 

merged_df = reduce(lambda left, right: pd.merge(left, right, on='toas', how='outer'), dfs)


In [6]:
merged_df 

Unnamed: 0,toas,residuals_0,residuals_1
0,4.580106e+09,3.380995e-06,
1,4.580106e+09,,-9.349598e-07
2,4.581370e+09,2.576195e-06,
3,4.581370e+09,,7.212309e-06
4,4.582755e+09,1.867198e-06,
...,...,...,...
255,4.731184e+09,-9.911458e-07,
256,4.732025e+09,,-1.214947e-05
257,4.732025e+09,-8.987281e-07,
258,4.733030e+09,-1.237823e-06,


In [7]:
combined_df = pd.concat(dfs_meta, ignore_index=True)


In [8]:
combined_df

Unnamed: 0,name,dim_M,RA,DEC
0,J0030+0451,8,0.132895,0.084841
1,J0218+4232,11,0.6026,0.742431


In [23]:
ra = combined_df['RA'].to_numpy()
dec = combined_df['DEC'].to_numpy()

In [26]:
from numpy import sin, cos

"""
Given a latitude theta and a longitude phi, get the xyz unit vector which points in that direction 
"""
def _unit_vector(theta,phi):
    qx = sin(theta) * cos(phi)
    qy = sin(theta) * sin(phi)
    qz = cos(theta)
    return np.array([qx, qy, qz]).T


In [27]:
q = _unit_vector(np.pi/2.0 -dec, ra) # 3 rows, N columns


In [28]:
Npsr = len(combined_df)

In [29]:
import numpy as np
#Get angle between all pulsars
#Doing this explicitly for completeness - I am sure faster ways exist
ζ = np.zeros((Npsr,Npsr))

for i in range(Npsr):
    for j in range(Npsr):

        if i == j: #i.e. angle between same pulsars is zero
            ζ[i,j] = 0.0 
            
        else: 
            vector_1 = q[i,:]
            vector_2 = q[j,:]
            dot_product = np.dot(vector_1, vector_2)

            ζ[i,j] = np.arccos(dot_product)

In [37]:
"""
Given an angle α, return the correlation
"""
def correlation_function(α):

    with np.errstate(divide='ignore', invalid='ignore'): #ignore the errors that arise from taking np.log(0). These get replaced with 1s
        bar = (1.0 - np.cos(α))/2
        out = np.nan_to_num(1.5*bar * (np.log(bar) - bar/4.0 + 0.5 + 0.5),nan=1.0) #replace nans with 1 for when α=0
    #out = 1.5*bar * np.log(bar) - bar/4.0 + 0.5 + 0.5 +0.5 #replace nans with 1 for when α=0

    return out

In [38]:
correlation_function(ζ)

array([[ 1.        , -0.21037178],
       [-0.21037178,  1.        ]])

# 2. Specify the model to use with the filter 

In [9]:
from models import StochasticGWBackgroundModel

In [10]:
psr_information = combined_df

In [11]:
combined_df

Unnamed: 0,name,dim_M,RA,DEC
0,J0030+0451,8,0.132895,0.084841
1,J0218+4232,11,0.6026,0.742431


In [12]:
model = StochasticGWBackgroundModel(psr_information)

In [45]:
import numpy as np 
θ = {'dt': 0.50,
     'γp': np.ones(len(combined_df)),
     'γa': 0.50,
     'σp': 1e-10 * np.ones(model.Npsr),
     'h2': 1e-10,
     'gamma_mat': correlation_function(ζ),
     'σ_epsilon': 1e-10, #a scalar, techincally wrong
     }
#F_array = model.F_matrix(θ)
Q_array = model.Q_matrix(θ)

In [46]:
Q_array.shape 

(27, 27)

In [41]:
model.M 

array([ 8, 11])

In [44]:
# dt = 0.5
# σ_eps = 
Q_offset_blocks = [np.eye(M_val) for M_val in model.M ]


In [None]:
F_array

array([[1.   , 0.5  , 0.125, 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   ],
       [0.   , 1.   , 0.5  , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   ],
       [0.   , 0.   , 1.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   ],
       [0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   ,

: 

: 

: 

: 

# 3. Now run the filter

In [None]:
from kalman_filter import KalmanFilter

: 

: 

In [None]:


observations = merged_df 
model = 


x0 = 
P0 = #maybe should be part of the model?
self, model, observations, x0, P0

SyntaxError: invalid syntax (3739317549.py, line 2)

: 

: 