In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [21]:
data = pd.read_csv('corn_cleaned.csv')
price = data['Prices received by farmers (Dollars per bushel)']
price_change = (price - price.shift(1)).apply(lambda x: 1 if x > 0 else 0)
price_change = price_change.reset_index().drop(['index'], axis=1).rename(columns={'Prices received by farmers (Dollars per bushel)':'Price Change'})

In [22]:
exports = data['Exports, market year (Million bushels)'].reset_index().drop(['index'], axis=1).rename(columns={'Exports, market year (Million bushels)':'Exports'})

In [23]:
corn_yield = pd.read_csv('corn_yield.csv')

corn_grain_all = corn_yield[(corn_yield['YIELD in BU / ACRE'] != ' ') & \
                        (corn_yield['COMMODITY'] == 'CORN, GRAIN')][['YEAR', 'YIELD in BU / ACRE', 'AREA HARVESTED in ACRES']] \
             .rename(columns={'YIELD in BU / ACRE':'YIELD', 'AREA HARVESTED in ACRES':'AREA'})

corn_grain_all = corn_grain_all[corn_grain_all['AREA'] != ' '].reset_index().drop(['index'], axis=1)
corn_grain_all['AREA'] = corn_grain_all['AREA'].apply(lambda a: int(a.replace(',', '')))
corn_grain_all['YIELD'] = corn_grain_all['YIELD'].map(float)
corn_grain_all['TOTAL_YIELD'] = corn_grain_all.apply(lambda row: (row['AREA']*row['YIELD']), axis=1)

yields = corn_grain_all.groupby('YEAR').sum()['YIELD'].reset_index()
yields_shift = yields.shift(-1)

  corn_yield = pd.read_csv('corn_yield.csv')


In [27]:
df = pd.concat([yields_shift, exports, price_change], axis=1).dropna()
print(df.columns)

Index(['YEAR', 'YIELD', 'Exports', 'Price Change'], dtype='object')


In [29]:
# svm with grid search
tuned_parameters = [{'kernel': ['rbf'],
                     'gamma': [1e-2, 1e-3],
                     'C': [1, 10, 100]}]

clf = GridSearchCV(SVC(), tuned_parameters, cv=2,
                   scoring='roc_auc_ovr', verbose=1, n_jobs=4)
clf.fit(df[['Exports', 'YIELD']][:-5], df['Price Change'][:-5])

print("Best parameter set found on development set:")
print(clf.best_params_, '\n')

Fitting 2 folds for each of 6 candidates, totalling 12 fits
Best parameter set found on development set:
{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} 





In [30]:
svm = SVC(C=1, gamma=0.01, kernel='rbf')
svm.fit(df[['Exports', 'YIELD']][:-5], df['Price Change'][:-5])
print('train set score:', svm.score(df[['Exports', 'YIELD']][:-5], df['Price Change'][:-5]))
print('test set score:', svm.score(df[['Exports', 'YIELD']][-5:], df['Price Change'][-5:]))

train set score: 1.0
test set score: 0.6
