# AIM - To create a ML model to identify FM stations

Python version used -> python 3

# Step - 1
## Run RTL_power command 

Install GQRX software then connect RTL_SDR dongle and open terminal.

Note : Following command only works for Linux and Mac OS.

COMMAND -> rtl_power -f min:max:bin -g gain -i interval -e runtime filename.ext
where
min is initial frequency
max is terminal frequency
bin is frequency interval
interval in seconds

COMMAND I USED - 

rtl_power -f 87M:108M:1k -g 20 -i 10 -e 5m logfile.csv

All the data is stored in a csv file logfile.csv.

# Step - 2
## Data cleaning 

We will now convert obtained csv into a desireable pandas dataframe

In [68]:
import numpy as np
import pandas as pd
dfs = pd.read_csv("logfile.csv", header=None)

In [69]:
def conversion_function(df):    
    x=[] # Stores all the frequencies
    y=[] # Stores corresponding power value
    z=[] # Stores is_FM, if 1 then yes, if 0 then no

    # following array contains all BAY AREA FM STATIONS
    arr = [87.9, 88.1, 88.5, 89.1, 89.3, 89.5, 89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7]

    #following code makes 3 lists, x-> freq,y-> power, z-> Is_FM?
    for j in range(0,len(df)):
        for i in range(6,4103):
            y.append(float(df[i][j]))
            r = (df[3][j]-df[2][j])/4096
            temp = df[3][j]+(r*(i-6))
            x.append(temp)
            check = round(temp/100000)
            check = int(check)
            check = float(check/10)
            n=0
            if(check in arr):
                n=1
            z.append(int(n))
    df = pd.DataFrame({"Frequency":x,"Power":y,"Is_FM":z})
    return df

In [70]:
%%time
dfs = conversion_function(dfs)

Wall time: 1min 56s


# Step 3 - 
## Train all the classification models to find the best one

Note -  For training, frequency range is from 87 MHz to 108 MHz

In [71]:
%%time
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

Wall time: 0 ns


In [72]:
X = dfs.drop('Is_FM', axis=1)
y = dfs['Is_FM']

In [73]:
%%time

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X, y)

y_predict = knn.predict(X)
print("Using KNN -\nTraining accuracy :",accuracy_score(y,y_predict)*100,"%")

Using KNN -
Training accuracy : 100.0 %
Wall time: 2min 47s


In [74]:
from sklearn.externals import joblib
joblib.dump(knn, 'findingFMstations_trainedmodel.pkl') 

['findingFMstations_trainedmodel.pkl']

# Step - 4
## Testing with test data in frequency range 87MHz to 108MHz

Command used to generate file -

rtl_power -f 87M:108M:1k -g 20 -i 10 -e 1m logfile3.csv

Generating list of fm stations recognised -

In [75]:
def conv_func(df):
    x=[] # Stores all the frequencies
    y=[] # Stores corresponding power value
    
    for j in range(0,len(df)):
        for i in range(6,4103):
            y.append(float(df[i][j]))
            r = (df[3][j]-df[2][j])/4096
            temp = df[3][j]+(r*(i-6))
            x.append(temp)
    df = pd.DataFrame({"Frequency":x,"Power":y})
    return df

In [76]:
import numpy as np
import pandas as pd
import os
import subprocess
from sklearn.externals import joblib

#subprocess.run(["rtl_power", "-f", str(min)+"M:"+str(max)+"M:1k", "-g", "20", "-i", "10", "-e", "1m", "logfile3.csv"])
dfs = pd.read_csv("logfile3.csv", header=None)
ktr = joblib.load('findingFMstations_trainedmodel.pkl')
dfs = conv_func(dfs)
y_predict3 = ktr.predict(dfs)
l3=[]
for i in range(0,len(y_predict3)):
    num = round(dfs["Frequency"][i]/100000)
    num = int(num)
    num = float(num/10)
    if(y_predict3[i]==1):
        if(not num in l3):
            l3.append(num)
#os.remove("logfile3.csv")
#return l3

In [77]:
print(l3)

[89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7]


In [78]:
#Bay Area FM Radio Station
radio = [87.9, 88.1, 88.5, 89.1, 89.3, 89.5, 89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7]

In [79]:
ctr=0
for i in range (0,len(l3)):
    if(l3[i] in radio):
        ctr=ctr+1
print("KNN model found", len(l3),"radio stations out of which actual Bay Area FM Radio Station are",ctr,"\nAnd",ctr,"stations were found out of",len(radio),"Bay Area FM Radio Station\nKNN model accuracy -",ctr/len(l3)*100,"%")

KNN model found 40 radio stations out of which actual Bay Area FM Radio Station are 40 
And 40 stations were found out of 46 Bay Area FM Radio Station
KNN model accuracy - 100.0 %
