In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split

# Data Pre-process

In [2]:
kickstarter = pd.read_csv('../datasets/kickstarter-2018-reduced.csv').fillna(0)
kickstarter['main_category'], main = pd.factorize(kickstarter['main_category'])
kickstarter['country'], country = pd.factorize(kickstarter['country'])
kickstarter['state'], state = pd.factorize(kickstarter['state'])
kickstarter['category'], cat = pd.factorize(kickstarter['category'])
kickstarter.head()

Unnamed: 0,category,main_category,currency,goal,launched_utc,state,backers,country,usd pledged,time_open
0,0,0,GBP,1000.0,1439295148,0,0,0,0.0,1415
1,1,1,USD,30000.0,1504327437,0,15,1,100.0,1438
2,1,1,USD,45000.0,1357950050,0,3,1,220.0,1080
3,2,2,USD,5000.0,1331954651,0,1,1,1.0,721
4,3,1,USD,19500.0,1435998903,1,14,1,1283.0,1336


In [3]:
kickstarter.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
category,29999.0,38.86816,34.54452,0.0,9.0,30.0,56.0,158.0
main_category,29999.0,4.961465,4.103028,0.0,1.0,4.0,8.0,14.0
goal,29999.0,39679.89,743464.3,1.0,2000.0,5000.0,16000.0,100000000.0
launched_utc,29999.0,1411615000.0,61066110.0,3600.0,1367597000.0,1417417000.0,1458149000.0,1514881000.0
state,29999.0,0.9026301,1.027857,0.0,0.0,0.0,2.0,5.0
backers,29999.0,104.2569,899.3266,0.0,2.0,12.0,56.0,85581.0
country,29999.0,1.727258,2.814654,0.0,1.0,1.0,1.0,22.0
usd pledged,29999.0,6834.517,75086.48,0.0,16.0,390.0,3044.61,8596475.0
time_open,29999.0,831.2338,2078.19,24.0,720.0,720.0,889.5,356809.0


In [4]:
X = kickstarter.drop('main_category', axis=1)
y = kickstarter['main_category']

In [5]:
X = X[((y == 1) | (y == 6)) & (kickstarter['time_open'] < 500) & (kickstarter['state'] == 2)]
y = y[((y == 1) | (y == 6)) & (kickstarter['time_open'] < 500) & (kickstarter['state'] == 2)]
X = X[['backers', 'usd pledged']]
X.describe()

Unnamed: 0,backers,usd pledged
count,417.0,417.0
mean,153.165468,7583.792839
std,448.733473,46883.907328
min,1.0,0.0
25%,17.0,531.0
50%,36.0,1910.0
75%,87.0,4522.0
max,5794.0,917864.02


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)

In [None]:
# fit the model using ovo shape
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
    clf = svm.SVC(kernel=kernel,decision_function_shape='ovo')
    clf.fit(X_train, y_train)

    plt.figure(fig_num)
    plt.clf()
    plt.scatter(X.values[:, 0], X.values[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

    # Circle out the test data
    plt.scatter(X_test.values[:, 0], X_test.values[:, 1], s=80, facecolors='none',
                zorder=10, edgecolor='k')

    plt.axis('tight')
    x_min = X.values[:, 0].min()
    x_max = X.values[:, 0].max()
    y_min = X.values[:, 1].min()
    y_max = X.values[:, 1].max()

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
    
    plt.title(kernel)
    
plt.show()

In [None]:
# fit the model using ovo shape
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
    clf = svm.SVC(kernel=kernel,decision_function_shape='ovr')
    clf.fit(X_train, y_train)

    plt.figure(fig_num)
    plt.clf()
    plt.scatter(X.values[:, 0], X.values[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

    # Circle out the test data
    plt.scatter(X_test.values[:, 0], X_test.values[:, 1], s=80, facecolors='none',
                zorder=10, edgecolor='k')

    plt.axis('tight')
    x_min = X.values[:, 0].min()
    x_max = X.values[:, 0].max()
    y_min = X.values[:, 1].min()
    y_max = X.values[:, 1].max()

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
    
    plt.title(kernel)
    
plt.show()