In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("edataset.csv")

In [3]:
X = data[data.columns[29:34]]
Y = data[data.columns[34:35]]

In [4]:
X.shape,Y.shape

((40, 5), (40, 1))

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

In [7]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((28, 5), (12, 5), (28, 1), (12, 1))

In [8]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.gaussian_process import GaussianProcessRegressor

In [9]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
	# load all train
	X_train, y_train
	print(X_train.shape, y_train.shape)
	# load all test
	X_test, y_test
	print(X_test.shape, y_test.shape)
	# flatten y
	y_train, y_test
	print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
	return X_train, y_train, X_test, y_test

In [10]:
def define_models(models=dict()):
    models['knn'] =KNeighborsRegressor()
    models['linReg'] = LinearRegression()
    models['gpr'] = GaussianProcessRegressor()
    print('Defined %d models' % len(models))
    return models

In [11]:
def evaluate_model(X_train, y_train, X_test, y_test, model):
	# fit the model
	model.fit(X_train,y_train)
	# make predictions
	ypred = model.predict(X_test)
	# evaluate predictions
	score = model.score(X_train,y_train)
	return score * 100.0

In [12]:
# evaluate a dict of models {name:object}, returns {name:score}
def evaluate_models(X_train, y_train, X_test, y_test, models):
	results = dict()
	for name, model in models.items():
		# evaluate the model
		results[name] = evaluate_model(X_train, y_train, X_test, y_test, model)
		# show process
		print('>%s: %.3f' % (name, results[name]))
	return results

In [13]:
# print and plot the results
def summarize_results(results, maximize=True):
	# create a list of (name, mean(scores)) tuples
	mean_scores = [(k,v) for k,v in results.items()]
	# sort tuples by mean score
	mean_scores = sorted(mean_scores, key=lambda x: x[1])
	# reverse for descending order (e.g. for accuracy)
	if maximize:
		mean_scores = list(reversed(mean_scores))
	print()
	for name, score in mean_scores:
		print('Name=%s, Score=%.3f' % (name, score))

In [14]:
# load dataset
X_train, y_train, X_test, y_test = load_dataset()
# get model list
models = define_models()
# evaluate models
results = evaluate_models(X_train, y_train, X_test, y_test, models)
# summarize results
summarize_results(results)

(28, 5) (28, 1)
(12, 5) (12, 1)
(28, 5) (28, 1) (12, 5) (12, 1)
Defined 3 models
>knn: 36.807
>linReg: 52.911
>gpr: 100.000

Name=gpr, Score=100.000
Name=linReg, Score=52.911
Name=knn, Score=36.807
