In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Dataset/01-census-income-all.csv')

In [None]:
X = df[ ['age', 'edu num', 'marital status', 'occupation', 'sex', 'captial-gain','capital-loss', 'hours-per-week' ] ]
y = df['label']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [None]:
transformer = make_column_transformer(
    ( MinMaxScaler(), ['age', 'edu num','captial-gain','capital-loss', 'hours-per-week'] ),
    ( OneHotEncoder(), ['marital status', 'occupation', 'sex' ])
)

In [None]:
X_train_transformed = transformer.fit_transform(X_train)

In [None]:
X_test_transformed = transformer.transform(X_test)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logm = LogisticRegression(max_iter=1000)
logm.fit(X_train_transformed, y_train)

In [None]:
logm.coef_

In [None]:
logm.intercept_

In [None]:
logm.score(X_train_transformed, y_train)

In [None]:
logm.score(X_test_transformed, y_test)

In [None]:
logm.decision_function(X_test_transformed[0:5])

# SVM

In [None]:
from sklearn.svm import LinearSVC

In [None]:
lsvc = LinearSVC()
lsvc.fit(X_train_transformed, y_train)

In [None]:
lsvc.coef_

In [None]:
lsvc.intercept_

In [None]:
lsvc.score(X_train_transformed, y_train)

In [None]:
lsvc.score(X_test_transformed, y_test)

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC()

In [None]:
svc = SVC(kernel='poly')

In [None]:
svc.fit(X_train_transformed, y_train)

In [None]:
svc.support_vectors_.toarray()

In [None]:
svc.support_.shape

In [None]:
svc.score(X_train_transformed, y_train)

In [None]:
svc.score(X_test_transformed, y_test)

# Voting Classifier

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

In [None]:
voting = VotingClassifier(
    estimators=[ ('knn', KNeighborsClassifier(31) ),
                        ('tree', DecisionTreeClassifier(max_depth=10) ),
                        ('gnb', GaussianNB() ),
                        ('lg', LogisticRegression(max_iter=1000) ),
                        ('svc', SVC() ) ]
)

In [None]:
voting.fit(X_train_transformed.toarray(), y_train)

In [None]:
voting.estimators_[1].get_n_leaves()

In [None]:
voting.score(X_train_transformed.toarray(), y_train)

In [None]:
voting.score(X_test_transformed.toarray(), y_test)

# Bagging

In [None]:
from sklearn.ensemble import BaggingClassifier

In [None]:
bag = BaggingClassifier(DecisionTreeClassifier(max_depth=10), n_estimators=50, oob_score=True )

In [None]:
bag.fit(X_train_transformed, y_train)

In [None]:
bag.oob_score_

In [None]:
bag.score(X_train_transformed, y_train)

In [None]:
bag.score(X_test_transformed, y_test)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators=100, max_leaf_nodes=16, max_depth=10, oob_score=True)

In [None]:
rf.fit(X_train_transformed, y_train)

In [None]:
rf.oob_score_

In [None]:
rf.score(X_train_transformed, y_train)

In [None]:
rf.score(X_test_transformed, y_test)

# AdaBoots

In [None]:
from sklearn.ensemble import AdaBoostClassifier

In [None]:
ada = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=3), 
    n_estimators=30,
    learning_rate=1
)

In [None]:
ada.fit(X_train_transformed, y_train)

In [None]:
ada.estimator_weights_

In [None]:
ada.estimator_errors_

In [None]:
ada.score(X_train_transformed, y_train)

In [None]:
ada.score(X_test_transformed, y_test)

# Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gdb = GradientBoostingClassifier()

In [None]:
gdb.fit(X_train_transformed, y_train)

In [None]:
gdb.train_score_

In [None]:
gdb.score(X_train_transformed, y_train)

In [None]:
gdb.score(X_test_transformed, y_test)

# Stacking

In [None]:
from sklearn.ensemble import StackingClassifier

In [None]:
stack = StackingClassifier(
    estimators=[  ('knn', KNeighborsClassifier(31) ),
                        ('tree', DecisionTreeClassifier(max_depth=10) ),
                        ('gnb', GaussianNB() ),
                        ('lg', LogisticRegression(max_iter=1000) ),
                        ('svc', SVC() ) 
    ],
    final_estimator=RandomForestClassifier()
)

In [None]:
stack.fit(X_train_transformed.toarray(), y_train)

In [None]:
stack.score(X_train_transformed.toarray(), y_train)

In [None]:
stack.score(X_test_transformed.toarray(), y_test)