### Ulile

In [None]:
import pandas as pd
import numpy as np
from scipy import stats as sts
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import SelectKBest, chi2
import requests

In [None]:
df = pd.read_csv("../data/abalone_dataset.csv")
df.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,type
0,M,0.535,0.42,0.15,0.6995,0.2575,0.153,0.24,3
1,I,0.51,0.38,0.115,0.5155,0.215,0.1135,0.166,1
2,I,0.185,0.13,0.045,0.029,0.012,0.0075,0.0095,1
3,M,0.55,0.45,0.17,0.81,0.317,0.157,0.22,3
4,I,0.535,0.415,0.15,0.5765,0.3595,0.135,0.225,1


In [None]:
X_numeric = df.iloc[:, 1:8]
y = df.loc[:, "type"]
X.head()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight
0,0.535,0.42,0.15,0.6995,0.2575,0.153,0.24
1,0.51,0.38,0.115,0.5155,0.215,0.1135,0.166
2,0.185,0.13,0.045,0.029,0.012,0.0075,0.0095
3,0.55,0.45,0.17,0.81,0.317,0.157,0.22
4,0.535,0.415,0.15,0.5765,0.3595,0.135,0.225


In [None]:
feat_selec = SelectKBest(chi2, k="all")
kfit = feat_selec.fit(X_numeric, y)

p_val = pd.DataFrame(kfit.pvalues_)
sc_val = pd.DataFrame(kfit.scores_)

sts_sum = pd.concat([pd.DataFrame(X_numeric.columns), p_val, sc_val], axis=1)
nm =['length', 'diameter', 'height', 'whole_weight', 'shucked_weight',
       'viscera_weight', 'shell_weight']
sts_sum.columns = ["feat", "p_val", "sc_val"]
sts_sum.sort_values(by="p_val", inplace=True)
sts_sum

Unnamed: 0,feat,p_val,sc_val
3,whole_weight,7.372311e-72,327.576791
4,shucked_weight,1.339197e-26,119.150285
6,shell_weight,1.612878e-23,104.962874
5,viscera_weight,2.285995e-16,72.029121
0,length,4.799193e-08,33.704466
1,diameter,2.189638e-07,30.668719
2,height,0.001026332,13.763528


In [None]:
p_val_thold = 0.05
sc_thold = 5

selected_var = sts_sum.loc[(sts_sum["sc_val"] >= sc_thold) & (sts_sum["p_val"] <= p_val_thold)]
selected_var = selected_var["feat"].tolist()
X_rd = X_numeric[selected_var]
X_rd

Unnamed: 0,whole_weight,shucked_weight,shell_weight,viscera_weight,length,diameter,height
0,0.6995,0.2575,0.2400,0.1530,0.535,0.420,0.150
1,0.5155,0.2150,0.1660,0.1135,0.510,0.380,0.115
2,0.0290,0.0120,0.0095,0.0075,0.185,0.130,0.045
3,0.8100,0.3170,0.2200,0.1570,0.550,0.450,0.170
4,0.5765,0.3595,0.2250,0.1350,0.535,0.415,0.150
...,...,...,...,...,...,...,...
3127,0.9800,0.2585,0.3800,0.2070,0.545,0.405,0.175
3128,1.2590,0.4870,0.4450,0.2215,0.655,0.525,0.185
3129,0.4925,0.2410,0.1200,0.1075,0.450,0.340,0.120
3130,0.7270,0.2910,0.2350,0.1835,0.520,0.410,0.155


In [None]:
X_new = pd.concat([X_rd, df.sex], axis=1)
X_new.head()

Unnamed: 0,whole_weight,shucked_weight,shell_weight,viscera_weight,length,diameter,height,sex
0,0.6995,0.2575,0.24,0.153,0.535,0.42,0.15,M
1,0.5155,0.215,0.166,0.1135,0.51,0.38,0.115,I
2,0.029,0.012,0.0095,0.0075,0.185,0.13,0.045,I
3,0.81,0.317,0.22,0.157,0.55,0.45,0.17,M
4,0.5765,0.3595,0.225,0.135,0.535,0.415,0.15,I


In [None]:
#std_scaller = StandardScaler()
#min_max = MinMaxScaler()
#X_prep =  pd.DataFrame(min_max.fit_transform(X))
#X_prep.set_index(X.index, inplace=True)

#X_prep.columns = X.columns
#X_prep["sex"] = df.sex
#X_prep.head()

In [None]:
X_trans = X_new.merge(
    pd.get_dummies(X_new.sex, drop_first=True), left_index=True, right_index=True)
X_trans.drop("sex", axis = 1, inplace=True)
X_trans.head()

Unnamed: 0,whole_weight,shucked_weight,shell_weight,viscera_weight,length,diameter,height,I,M
0,0.6995,0.2575,0.24,0.153,0.535,0.42,0.15,0,1
1,0.5155,0.215,0.166,0.1135,0.51,0.38,0.115,1,0
2,0.029,0.012,0.0095,0.0075,0.185,0.13,0.045,1,0
3,0.81,0.317,0.22,0.157,0.55,0.45,0.17,0,1
4,0.5765,0.3595,0.225,0.135,0.535,0.415,0.15,1,0


In [None]:
y = y.loc[X_trans.index]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_trans, y, test_size=0.30, random_state=33)

In [None]:
tree = DecisionTreeClassifier()
gnb = GaussianNB()
svc = SVC(kernel="poly")

tree.fit(X_train, y_train)
gnb.fit(X_train, y_train)
svc.fit(X_train, y_train)

y_hat0 = tree.predict(X_test)
y_hat1 = gnb.predict(X_test)
y_hat2 = svc.predict(X_test)

print("Tree {}".format(accuracy_score(y_test, y_hat0)))
print("Gaus: {}".format(accuracy_score(y_test, y_hat1)))
print("SVM: {}".format(accuracy_score(y_test, y_hat2)))

Tree 0.5670212765957446
Gaus: 0.573404255319149
SVM: 0.6702127659574468


In [None]:
print(' - Aplicando modelo e enviando para o servidor')
data_app = pd.read_csv("../data/abalone_app.csv")

 - Aplicando modelo e enviando para o servidor


In [None]:
X_trans.head()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,I,M
0,0.535,0.42,0.15,0.6995,0.2575,0.153,0.24,0,1
1,0.51,0.38,0.115,0.5155,0.215,0.1135,0.166,1,0
2,0.185,0.13,0.045,0.029,0.012,0.0075,0.0095,1,0
3,0.55,0.45,0.17,0.81,0.317,0.157,0.22,0,1
4,0.535,0.415,0.15,0.5765,0.3595,0.135,0.225,1,0


In [None]:
X_test = data_app.merge(
    pd.get_dummies(data_app.sex, drop_first=True), left_index=True, right_index=True)
X_test.drop("sex", axis = 1, inplace=True)
X_test.head()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,I,M
0,0.6,0.48,0.175,1.229,0.4125,0.2735,0.415,0,1
1,0.545,0.385,0.15,1.1185,0.5425,0.2445,0.2845,0,0
2,0.645,0.52,0.18,1.285,0.5775,0.352,0.317,0,0
3,0.64,0.51,0.17,1.3715,0.567,0.307,0.409,0,1
4,0.655,0.54,0.215,1.5555,0.695,0.296,0.444,0,0


In [None]:
#data_app = data_app[feature_cols]
y_pred = svc.predict(X_test)

URL = "https://aydanomachado.com/mlclass/03_Validation.php"

data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

r = requests.post(url = URL, data = data)

pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")


 - Resposta do servidor:
 {"status":"success","dev_key":"Leh11","accuracy":0.6172248803827751,"old_accuracy":0} 

