In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale, PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor

from radio_snr import *

In [3]:
df = pd.read_csv('wspr_sample.csv', index_col=0)
df.head()

Unnamed: 0,spot,timestamp,reporter,reporter_grid,snr,frequency,tx_call,tx_grid,power,drift,distance,azimuth,band,version,code
4541493,1093722523,1520245320,DC5AL-R,JO31lk,9,7.040113,G0NJS,IO91vs,37,0,496,91,7,,0
4692526,1093866340,1520259000,KA3JIJ,EM84cj,-27,10.140175,WB0KSL,EM28nu,37,0,1105,113,10,,0
17907988,1107180087,1521372240,PI9ESA,JO22ff,-5,10.140179,IQ6KX,JN63so,20,0,1172,328,10,,0
10270295,1099476317,1520727720,PA0EHG,JO22hb,-17,3.594176,PA7MDJ,JO21is,23,0,33,350,3,,0
29854808,1119235300,1522494360,AG5OV,EL09,-10,7.040137,WA4KFZ,FM18gv,37,0,2236,249,7,1.9.0-rc3,0


In [4]:
df = preprocess_data(df)
df.head()

Unnamed: 0,spot,reporter,snr,frequency,tx_call,power,drift,distance,azimuth,band,version,code,rx_lat,rx_long,tx_lat,tx_long,day,hour
4541493,1093722523,DC5AL-R,9,7.040113,G0NJS,37,0,496,91,7,,0,51.4375,6.958333,51.770833,-0.208333,17595,10
4692526,1093866340,KA3JIJ,-27,10.140175,WB0KSL,37,0,1105,113,10,,0,34.395833,-83.791667,38.854167,-94.875,17595,14
17907988,1107180087,PI9ESA,-5,10.140179,IQ6KX,20,0,1172,328,10,,0,52.229167,4.458333,43.604167,13.541667,17608,11
10270295,1099476317,PA0EHG,-17,3.594176,PA7MDJ,23,0,33,350,3,,0,52.0625,4.625,51.770833,4.708333,17601,0
29854808,1119235300,AG5OV,-10,7.040137,WA4KFZ,37,0,2236,249,7,1.9.0-rc3,0,29.0,-100.0,38.895833,-77.458333,17621,11


In [5]:
targets = df['snr']
features = df[['power', 'drift', 'distance', 'azimuth', 'band', 'rx_lat', 'rx_long', 'tx_lat', 'tx_long', 'day', 'hour']]
features.head()

Unnamed: 0,power,drift,distance,azimuth,band,rx_lat,rx_long,tx_lat,tx_long,day,hour
4541493,37,0,496,91,7,51.4375,6.958333,51.770833,-0.208333,17595,10
4692526,37,0,1105,113,10,34.395833,-83.791667,38.854167,-94.875,17595,14
17907988,20,0,1172,328,10,52.229167,4.458333,43.604167,13.541667,17608,11
10270295,23,0,33,350,3,52.0625,4.625,51.770833,4.708333,17601,0
29854808,37,0,2236,249,7,29.0,-100.0,38.895833,-77.458333,17621,11


In [10]:
features.loc[:,:] = scale(features)
features.describe()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,power,drift,distance,azimuth,band,rx_lat,rx_long,tx_lat,tx_long,day,hour
count,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0,15241.0
mean,-1.396793e-16,-6.14498e-16,8.595651e-18,-3.575208e-17,-2.369413e-16,-5.4283720000000006e-17,2.6078330000000002e-17,-1.003069e-17,6.266084e-17,1.607824e-16,-8.70164e-17
std,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033
min,-4.136937,-5.526641,-0.7916351,-1.561126,-0.7668448,-7.23642,-2.416686,-9.019212,-2.430953,-1.613624,-1.853105
25%,-0.9619939,0.1871674,-0.4988743,-0.9353673,-0.2357083,-0.2984445,-1.009491,-0.3160563,-1.027582,-0.8365406,-0.7949063
50%,0.004293119,0.1871674,-0.3035568,-0.06474661,-0.0586628,0.2926363,0.5091945,0.2213749,0.419715,-0.05945738,0.1121212
75%,0.9705801,0.1871674,0.04482426,0.9872533,0.2069054,0.5133065,0.6146588,0.5356107,0.5863322,0.8286377,0.8679775
max,3.7314,5.900976,7.265107,1.703702,41.36998,1.574625,3.584985,2.539965,3.593015,1.716733,1.623834


In [16]:
mse = []
r2 = []
for n in range(1, 25):
    print(n)
    knn = KNeighborsRegressor(n_neighbors=n)
    tmp_mse = -1*cross_val_score(knn, features, targets, scoring='neg_mean_squared_error')
    mse.append(tmp_mse)
    knn.fit(features, targets)
    tmp_r2 = knn.score(features, targets)
    r2.append(tmp_r2)
err = pd.DataFrame()
err['rmse'] = np.sqrt(np.array(mse))
err['r^2'] = r2
err.sort_values('rmse')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series