In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.svm import SVR, SVC
import statsmodels.api as sm
from sklearn.metrics import roc_auc_score, mean_squared_error, accuracy_score
import xgboost as xgb
import time
import random as rnd

standarscaler = StandardScaler()

import warnings
warnings.filterwarnings("ignore")

In [2]:
TrainData = pd.read_csv('count_train.csv')
X = TrainData.drop('yC', axis = 1)
Y = TrainData['yC']

Xs = standarscaler.fit_transform(X)
Xs = pd.DataFrame(Xs, index=X.index, columns=X.columns.values)

In [3]:
TestData = pd.read_csv('count_test.csv')
Xt = TestData.drop('yC', axis = 1)
Yt = TestData['yC']

Xts = standarscaler.transform(Xt)
Xts = pd.DataFrame(Xts, index=Xt.index, columns=Xt.columns.values)

In [4]:
variables = ['x4', 'x13', 'x25', 'x24', 'x27', 'x7', 'x14', 'x3', 'x2', 'x8', 'x22']

In [33]:
def capacidad_pred_especifica(Ytrue, Yest):
    y1 = Ytrue.values.copy()
    y2 = Yest.values.copy()
    y1[y1!=0] = 1
    y2[y2!=0] = 1
    return(accuracy_score(y1, y2))

In [83]:
General = []
Especifica = []

# xgboost

In [84]:
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror')
xg_reg.fit(Xs[variables],Y)
preds = xg_reg.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.0327955589886444
Especifica es 0.6


# Multiclass logistic regression

In [85]:
MLR = OneVsRestClassifier(LogisticRegression(), n_jobs=-1)
MLR.fit(Xs[variables], Y)
preds = MLR.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.4605934866804429
Especifica es 0.6333333333333333


# Random Forest Regressor

In [86]:
RFR = RandomForestRegressor()
RFR.fit(Xs[variables],Y)
preds = RFR.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.4023789311975086
Especifica es 0.5333333333333333


# Support Vector Machine Regressor

In [87]:
SVMR = SVR()
SVMR.fit(Xs[variables],Y)
preds = SVMR.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.378404875209022
Especifica es 0.6333333333333333


# Support Vector Machine Classifier

In [88]:
SVMC = SVC()
SVMC.fit(Xs[variables],Y)
preds = SVMC.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.4719601443879744
Especifica es 0.6333333333333333


# Multilayer Perceptron Classifier

In [89]:
MLPC = MLPClassifier()
MLPC.fit(Xs[variables],Y)
preds = MLPC.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.4719601443879744
Especifica es 0.6


# Multilayer Perceptron Regressor

In [90]:
MLPR = MLPRegressor()
MLPR.fit(Xs[variables],Y)
preds = MLPR.predict(Xts[variables])

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.3038404810405297
Especifica es 0.5666666666666667


# Poisson Regression

In [91]:
PR = sm.GLM(Y, Xs[variables], family=sm.families.Poisson()).fit()
preds = PR.get_prediction(Xts[variables]).summary_frame()['mean']

MSE = np.sqrt(mean_squared_error(Yt, np.round(preds)))
ESP = capacidad_pred_especifica(Yt, pd.Series(np.round(preds)))
print('MSE es ' + str(MSE))
print('Especifica es ' + str(ESP))
General.append(MSE)
Especifica.append(ESP)

MSE es 1.2780193008453875
Especifica es 0.6


## Resultados

In [100]:
Resultados = pd.DataFrame([General, Especifica])
Resultados.columns = ['XGBoost', 'Multiclass Logistic Regression', 'Random Forest Regressor', 'SVM Regressor',
                      'SVM Classifier', 'Multilayer Percepton Classifier', 'Multilayer Perceptron Regressor',
                      'Poisson Regression']

Resultados

Unnamed: 0,XGBoost,Multiclass Logistic Regression,Random Forest Regressor,SVM Regressor,SVM Classifier,Multilayer Percepton Classifier,Multilayer Perceptron Regressor,Poisson Regression
0,1.032796,1.460593,1.402379,1.378405,1.47196,1.47196,1.30384,1.278019
1,0.6,0.633333,0.533333,0.633333,0.633333,0.6,0.566667,0.6
