In [15]:
from sklearn.neural_network import MLPClassifier as nnet
from sklearn.tree import DecisionTreeClassifier as dtree
from sklearn.model_selection import StratifiedKFold as strat
import numpy as np

# converts the data in the file to two arrays, one with the features and one with the class labels
def convert_data_to_array(filename):
    data = np.genfromtxt(filename, delimiter=',', dtype=np.float64)
    features, classes = np.split(data, [21], 1)
    return (features, classes)

feat, clss = convert_data_to_array("regression_train.data")
clss = np.reshape(clss, -1) # makes the data into a column vector
print(clss)

[81. 91. 96. ... 87. 98. 90.]


In [18]:
from sklearn.metrics import mean_squared_error
def get_validation_accuracy(classifier, data):
    # create the 10 folds in the dataset
    skf = strat(n_splits=10, shuffle=True)
    skf.get_n_splits(feat, clss)

    fold = 0
    error_sum = 0
    for train_index, test_index in skf.split(data, clss):
        fold += 1
        # get test and training data for classes and feature arrays
        x_train, x_test = data[train_index], data[test_index]
        y_train, y_test = clss[train_index], clss[test_index]

        # train classifier
        classifier.fit(x_train, y_train)
        res = classifier.predict(x_test)

        # check accuracy of results
        error_sum += mean_squared_error(y_test, res)
    return error_sum / 10

# First Iteration - Linear Regression

In [23]:
from sklearn.linear_model import LinearRegression
error = get_validation_accuracy(LinearRegression(), feat)
print(error)

88.70759668529475




In [25]:
from sklearn.linear_model import LogisticRegression
error = get_validation_accuracy(LogisticRegression(), feat)
print(error)



27.086508762088034


In [26]:
from sklearn.linear_model import Lasso
error = get_validation_accuracy(Lasso(), feat)
print(error)



90.19346786033229


In [27]:
from sklearn.linear_model import ElasticNet
error = get_validation_accuracy(ElasticNet(), feat)
print(error)



90.45215279617274


In [28]:
from sklearn.linear_model import Ridge
error = get_validation_accuracy(Ridge(), feat)
print(error)

88.76923717268775




In [29]:
from sklearn.svm import SVR
error = get_validation_accuracy(SVR(), feat)
print(error)



344.94063587470595


In [30]:
from sklearn.ensemble import AdaBoostRegressor
error = get_validation_accuracy(AdaBoostRegressor(), feat)
print(error)



18.330581175034634


In [31]:
from sklearn.ensemble import BaggingRegressor
error = get_validation_accuracy(BaggingRegressor(), feat)
print(error)



6.517076323645743


In [33]:
from sklearn.ensemble import RandomForestRegressor
error = get_validation_accuracy(RandomForestRegressor(), feat)
print(error)



7.330632518627072


In [40]:
from sklearn.ensemble import BaggingRegressor
error = get_validation_accuracy(BaggingRegressor(RandomForestRegressor()), feat)
print(error)



6.289713066508708


# Beset Classifier

In [43]:
data = np.genfromtxt('regression_test.data', delimiter=',', dtype=np.float64)
data, _ = np.split(data, [21], 1)
classifier = BaggingRegressor(RandomForestRegressor())
classifier.fit(feat, clss)

# Reduce the data
results = list(classifier.predict(data))
with open('regression_results.txt', 'w+') as f:
    for result in results:
        f.write(str(result) + '\n')