In [1]:
import data_matrix as dm
import numpy as np

In [2]:
from sklearn.externals import joblib
from sklearn.svm import SVR

In [3]:
def get_model(X,y):
    clf = SVR()
    clf.fit(X, y)
    return clf

# Simple Example

In [4]:
women_wf = b"SL.TLF.TOTL.FE.ZS"

In [40]:
X,y = dm.get_training_matrix(p_sc=women_wf,cc=b"USA")
svm = get_model(X[:-1],y[:-1])

In [41]:
predicted_slope = svm.predict([X[-1]])
predicted_slope

array([-0.01714092])

In [7]:
# Take the 2013 value and add the predicted slope
prediction = dm.get_feature_value(women_wf, b"USA", 2013) + predicted_slope[0]
actual = dm.get_feature_value(women_wf, b"USA", 2014)
previous_years = [dm.get_feature_value(women_wf, b"USA", y) for y in range(2005,2015)]

In [8]:
print("For the year 2014, i predict {}, but the actual value is {}".format(prediction, actual))
print("Historically, the values have been {}".format(previous_years))

For the year 2014, i predict 45.77551883459091, but the actual value is 45.83927536010742
Historically, the values have been [45.764839, 45.756645, 45.805981, 45.950191, 46.125431, 46.205242, 46.057381, 45.936733, 45.79266, 45.839275]


# Testing a spike

In [9]:
X,y = dm.get_training_matrix(p_sc=women_wf,cc=b"USA")
svm = get_model(X,y)

In [10]:
data_2015, key_2015 = dm.get_xrow(women_wf, b"USA", 2015)

In [25]:
# Add the spike
spike_sc = b"SL.TLF.TOTL.IN"
spike_cc = b"USA"
spike_yr = 2015
spike_amt = 2000000000000000

In [26]:
print("These are the SC slopes we'll add the spike to")
print(key_2015[(key_2015[:,0] == spike_sc) & (key_2015[:,1] == spike_cc)])
print("These are the current slopes")
print(data_2015[(key_2015[:,0] == spike_sc) & (key_2015[:,1] == spike_cc)])

These are the SC slopes we'll add the spike to
[[b'SL.TLF.TOTL.IN' b'USA' b'2014' b'2013']
 [b'SL.TLF.TOTL.IN' b'USA' b'2014' b'2012']
 [b'SL.TLF.TOTL.IN' b'USA' b'2014' b'2011']]
These are the current slopes
[ 1197696.  1719312.  3068448.]


In [27]:
spike_X,key_2015 = dm.get_xrow_with_spike(p_sc=women_wf, p_cc=b"USA", year=spike_yr, spike_sc=spike_sc, spike_cc=spike_cc, spike_amt=spike_amt)

print("These are the new slopes")
print(spike_X[(key_2015[:,0] == spike_sc) & (key_2015[:,1] == spike_cc)])

1.61049e+08 2.0000000012e+15 2.00000016105e+15 1.59851e+08
1.61049e+08 2.00000000172e+15 2.00000016105e+15 1.5933e+08
1.61049e+08 2.00000000307e+15 2.00000016105e+15 1.5798e+08
These are the new slopes
[  2.00000000e+15   2.00000000e+15   2.00000000e+15]


# wtf

In [44]:
print(svm.predict(np.ones(data_2015.shape)), svm.predict(data_2015))

[-0.01714092] [-0.01714092]




In [30]:
# We can use this spike for prediction
print("Predicted slope w/o spike {}, with spike {}".format(svm.predict(data_2015), svm.predict(0 * spike_X)))

Predicted slope w/o spike [-0.01714092], with spike [-0.01714092]


