# Simple Classification Models
In this notebook we use the sklearn library to create both a support vector machine and gradient boosting classification model.

In [55]:
from sklearn.linear_model import LinearRegression
from sklearn import svm, metrics, ensemble
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# saving models
import pickle

## Load Data

In [48]:
ipos = pd.read_csv('../data/clean_bloomberg_with_sectors.csv')
ipo_labels = ipos["Underpriced"].tolist()
ipo_features = ipos._get_numeric_data().columns.values.tolist()[1:-1]
ipo_features.remove('Offer To 1st Close')

## Split data into Training and Testing sets
The data is split into 70% training data and 30% test data 

In [None]:
ipo_features = ipo_features[:-1]
ipo_features_data = ipos[ipo_features]
train_data, test_data, train_labels, test_labels = train_test_split(ipo_features_data, ipo_labels, test_size=0.3)

## Support Vector Machine
The algorithm creates a hyperplane which separates the data into classes.

In [39]:
clf = svm.SVC()
clf.fit(train_data, train_labels)
ipo_labels_pred_svm = clf.predict(test_data)

In [None]:
# save the model
with open('saved_models/SVM.pkl', 'wb') as file:
  pickle.dump(clf, file)

In [53]:
print("Accuracy:", metrics.accuracy_score(test_labels, ipo_labels_pred_svm))

Accuracy: 0.7196078431372549


# Gradient Boosting Classifier
Creates an ensemble of weak predictions decision trees.

In [43]:
params = {
    "n_estimators": 500,
    "learning_rate": 0.01,
    "loss": "log_loss",
}

In [44]:
reg = ensemble.GradientBoostingClassifier(**params)
reg.fit(train_data, train_labels)

In [49]:
# save the model
with open('saved_models/gradient_boosting_classifier.pkl', 'wb') as file:
  pickle.dump(reg, file)

In [45]:
ipo_labels_pred_gb = reg.predict(test_data)

In [54]:
print("Accuracy:", metrics.accuracy_score(test_labels, ipo_labels_pred_gb.round()))

Accuracy: 0.7392156862745098
