### 3.Calculation of RFI


We get  [RFI sources](https://science.nrao.edu/facilities/vla/observing/RFI/L-Band) for the the L-Band from the website.

**Note:** The RFIs with lower and upper frequencies have been splitted into two separate columns. 

We would save the the RFI on the website in a CSV file and do basic data cleaning.

In [1]:
#import packages
import math
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from astropy import units as u
from astropy import constants as const

from sklearn import model_selection

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

%matplotlib inline

In [2]:
rfi_data = pd.read_csv('vla_RFI.csv')
rfi_data.head()

Unnamed: 0,Frequency,Description,Origin,Classification
0,1000,RATSCAT Low FQ,"NM, WSMR",Intermittent
1,1030,"Aeronautical IFF, gnd2air","OHD, Everywhere",Intermittent
2,1025-1150,Aircraft navigation DME,DME/VOR sites,intermittent
3,1090,"Aeronautical IFF, air2gnd","OHD, Everywhere",Intermittent
4,1166-1186,"GPS,Galileo,GLONASS L5","OHD, MEO",Continuous


In [3]:
# drop the Unnamed: 4 column
# rfi_data.drop(columns=['Unnamed: 4'], inplace=True)

# check for empty values in the dataframe
rfi_data.isnull().values.any()

# fill empty values with "NA"
rfi_data.fillna('NA',inplace=True)

# convert all the values in the classsificalion column to lower case
rfi_data['Classification'] = rfi_data['Classification'].str.lower()
# check if there is a hyphen in the frequency column then split the frequency column into two columns,
# start_freq and end_freq, in the case where there is no hyphen then the start_freq and end_freq will be the same
rfi_data['start_freq'] = rfi_data['Frequency'].str.split('-').str[0].astype(float)
rfi_data['end_freq'] = rfi_data['Frequency'].str.split('-').str[1].astype(float)

# fill the end_freq column with NA values with the start_freq column values
rfi_data['end_freq'].fillna(rfi_data['start_freq'], inplace=True)

# get a 2D array of the start_freq and end_freq columns
freq_range = rfi_data[['start_freq','end_freq']].values


In [4]:
# get column names with empty values
rfi_data.columns[rfi_data.isnull().any()]

Index([], dtype='object')

There are no longer columns with empty values. All the columns that had empty values, i.e., `Origin` and `Classification` columns have been filled with `NA`.


Now lets have a look at the header of our data after the modifications we have made.

In [5]:
rfi_data.head()

Unnamed: 0,Frequency,Description,Origin,Classification,start_freq,end_freq
0,1000,RATSCAT Low FQ,"NM, WSMR",intermittent,1000.0,1000.0
1,1030,"Aeronautical IFF, gnd2air","OHD, Everywhere",intermittent,1030.0,1030.0
2,1025-1150,Aircraft navigation DME,DME/VOR sites,intermittent,1025.0,1150.0
3,1090,"Aeronautical IFF, air2gnd","OHD, Everywhere",intermittent,1090.0,1090.0
4,1166-1186,"GPS,Galileo,GLONASS L5","OHD, MEO",continuous,1166.0,1186.0


In [6]:
# add an amplitude column calculated as the speed of light divided by the frequency
# I have chose to use the end_freq column because it is the highest frequency
rfi_data['amplitude'] = const.c/((rfi_data['end_freq'])*10**6)

# get an array of the amplitude column
amplitude = rfi_data['amplitude'].values
amplitude

array([0.29979246, 0.29106064, 0.26068909, 0.27503895, 0.2527761 ,
       0.24982705, 0.24502857, 0.24235445, 0.24353571, 0.24300075,
       0.24274693, 0.23964225, 0.23935526, 0.2389889 , 0.23751769,
       0.23642938, 0.23357418, 0.23221724, 0.23149997, 0.2288492 ,
       0.2284133 , 0.22763285, 0.22540786, 0.22418077, 0.22288739,
       0.22092296, 0.21962817, 0.21898646, 0.21630047, 0.21645665,
       0.21521354, 0.21413747, 0.21292078, 0.2092792 , 0.20632654,
       0.20754064, 0.20696752, 0.20639756, 0.20625556, 0.205267  ,
       0.19966198, 0.19926385, 0.19781752, 0.19168316, 0.19652079,
       0.19600684, 0.19587877, 0.19150928, 0.18902425, 0.1890934 ,
       0.18667027, 0.18551513, 0.18426088, 0.18257762, 0.17844789,
       0.17770744, 0.17707765, 0.17614128, 0.17541981, 0.16927863,
       0.15064948])

In [7]:
rfi_data.head()

Unnamed: 0,Frequency,Description,Origin,Classification,start_freq,end_freq,amplitude
0,1000,RATSCAT Low FQ,"NM, WSMR",intermittent,1000.0,1000.0,0.299792
1,1030,"Aeronautical IFF, gnd2air","OHD, Everywhere",intermittent,1030.0,1030.0,0.291061
2,1025-1150,Aircraft navigation DME,DME/VOR sites,intermittent,1025.0,1150.0,0.260689
3,1090,"Aeronautical IFF, air2gnd","OHD, Everywhere",intermittent,1090.0,1090.0,0.275039
4,1166-1186,"GPS,Galileo,GLONASS L5","OHD, MEO",continuous,1166.0,1186.0,0.252776


In [8]:
# correct the classification values of 'pulsed & intermittant' to 'pulsed & intermittent'
rfi_data.loc[rfi_data['Classification'] == 'pulsed & intermittant','Classification'] = 'pulsed & intermittent'
# check unigue classifications in the dataframe
rfi_data['Classification'].unique()

array(['intermittent', 'continuous', 'pulsed & intermittent',
       'malfunction', 'na', 'sporadic'], dtype=object)

From the above code cell, we have six unique RFI classification classes namely; 

1. intermittent
2. continuous
3. pulsed & intermitted
4. malfunction
5. na
6. sporadic

In [9]:
# get unique values of the Classification column and store it in a list
classifications = rfi_data['Classification'].unique()
classifications

array(['intermittent', 'continuous', 'pulsed & intermittent',
       'malfunction', 'na', 'sporadic'], dtype=object)

In [10]:
# get unique values of the Description column and store it in a dictionary
descriptions = rfi_data['Description'].unique()
descriptions 

array(['RATSCAT Low FQ', 'Aeronautical IFF, gnd2air',
       'Aircraft navigation DME', 'Aeronautical IFF, air2gnd',
       'GPS,Galileo,GLONASS L5', 'VLA modem', 'TARS Pulsed radar',
       'GPS L2', 'AEROSTAT', 'FAA ARSR4 radar', 'GLONASS L2',
       'TARS pulsed radar', 'COMPASS E6', 'FAA ASR radar',
       'WSMR RAJPO transponder', 'GPS L3 NUDET', 'internal birdie',
       'WSMR telemetry', 'High alt balloon', 'Test telemetry',
       'High alt baloon', 'WSMR jamming telemetry', 'INMARSAT satellites',
       'GPS L1 jamming', 'GPS L1', 'GLONASS L1', 'IRIDIUM satellites',
       '2nd harmonic VLA radios', 'RADIOSONDES- WX baloons',
       'GOES weather satellite', 'NOAA weather satellite',
       'PCS cell phone base stations'], dtype=object)

In [11]:
start_freq_range = rfi_data[['end_freq']].values
end_freq_range = rfi_data[['start_freq']].values


In [12]:
#get all the 'continuous' classifications in the dataframe and their start and end frequencies
max_freq = rfi_data[rfi_data['Classification'] == 'intermintent']['end_freq'].max()



In [13]:
max_freq

nan

We can now generate the RFI by using the upper and lower bounds of frequency to determine what channel it appears in. We differentiate how we add each source of RFI based off of its `Classification` 

In [14]:
def calculate_RFI(classifications, amplitude, freq_samples=64, min_freq=1, max_freq=2, min_HA=-1.5, max_HA=1.5,sampling_H=60*3*2):
    '''
    This function calculates the RFI dataframe
    Parameters
    ----------
    classifications: list
        The list of classifications
    freq_samples: int
        The number of frequency samples
    min_freq: float
        The minimum frequency in GHz
    max_freq: float
        The maximum frequency in GHz
    min_HA: float
        The minimum hour angle in hours
    max_HA: float
        The maximum hour angle in hours
    sampling_H: float
        The sampling interval of the hour angle in hours
    '''
     # create a multidimensional meshgrid of the frequency and hour angle
    meshgrid = np.mgrid[min_freq:max_freq:freq_samples*1j,
                        min_HA:max_HA:sampling_H*1j]
    
    freq = meshgrid[0]
    HA = meshgrid[1]

    # create frequency and hour angle point size
    freq_point_size = (max_freq-min_freq)/freq_samples
    HA_point_size = (max_HA-min_HA)/sampling_H
    

    # create an empty RFI array which take the shape of the freq meshgrid
    RFI = np.zeros(freq.shape)
    point_size_label = np.zeros((freq.shape[0], freq.shape[1], 64))
    # print(freq.shape)
    
    # create an empty RFI array which take the shape of the freq meshgrid
    RFI = np.zeros(freq.shape)
    point_size_label = np.zeros((freq.shape[0], freq.shape[1], 64))
    # print(freq.shape)
    
    classification = classifications
    description = descriptions
    amp = amplitude



                
                
    # return print(RFI) 

In [15]:
def get_max_freq(rfi)

SyntaxError: invalid syntax (3447875365.py, line 1)

In [None]:
def get_min_freq(freq_range):
   '''
   This function returns the minimum frequency in the freq_range array
   Parameters and maps the values to the classifications
   ----------
   freq_range: 2D array
       The 2D array of the start and end frequencies
   classifications: list
       The list of classifications
   '''
   min_freq = np.amin(freq_range, axis=1)
   return min_freq

In [None]:
def get_classification(classifications):
    '''
    This function returns the classification of the current classification
    Parameters
    ----------
    classifications: list
        The list of classifications
    '''
    classification = classifications
    return classification


In [None]:
get_classification(classifications)