In [57]:
import pandas as pd
import numpy as nd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score

In [2]:
path = '/Users/raisaurabh04/OneDrive/GreyAtom/Practice Dataset/bank_campaign_esemble_method_dataset.csv'
data = pd.read_csv(path)

In [3]:
log_clf_1 = LogisticRegression(random_state=0)
log_clf_2 = LogisticRegression(random_state=42)
decision_clf1 = DecisionTreeClassifier(criterion = 'entropy',random_state=0)
decision_clf2 = DecisionTreeClassifier(criterion = 'entropy', random_state=42)

In [4]:
Model_List=[('Logistic Regression 1', log_clf_1),
            ('Logistic Regression 2', log_clf_2),
            ('Decision Tree 1', decision_clf1),
            ('Decision Tree 2', decision_clf2)]

In [5]:
data.head(10)

X = data.iloc[ : , : -1]

y = data.iloc[ : , -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

voting_clf_hard = VotingClassifier(estimators=Model_List, voting='hard')

voting_clf_hard.fit(X_train, y_train)

hard_voting_score = voting_clf_hard.score(X_test, y_test)

voting_clf_soft = VotingClassifier(estimators=Model_List, voting='soft')

voting_clf_soft.fit(X_train, y_train)

soft_voting_score = voting_clf_soft.score(X_test, y_test)



In [6]:
max(soft_voting_score, hard_voting_score)

0.787996416840848

In [7]:
bagging_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), 
                                n_estimators = 100, random_state=0, max_samples=100)

bagging_clf.fit(X_train, y_train)

score_bagging = bagging_clf.score(X_test, y_test)

In [8]:
score_bagging

0.8139743206927441

In [9]:
pasting_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), 
                                n_estimators=100, max_samples=100, bootstrap=False, random_state=0)

pasting_clf.fit(X_train, y_train)

score_pasting = pasting_clf.score(X_test, y_test)

In [10]:
rf_clf = RandomForestClassifier(n_estimators=100, n_jobs=100, random_state=0, min_samples_leaf=100)

rf_clf.fit(X_train, y_train)

rf_clf.score(X_test, y_test)

0.8220364287847118

In [11]:
parameter_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

clf = RandomForestClassifier(random_state=0)

grid_search = GridSearchCV(estimator=clf, param_grid=parameter_grid)

grid_search.fit(X_train, y_train)

score_gs = grid_search.score(X_test, y_test)








































In [12]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 10,
 'min_samples_leaf': 10,
 'min_samples_split': 2}

In [13]:
clf = RandomForestClassifier(random_state=0)

random_search = RandomizedSearchCV(estimator=clf, param_distributions=parameter_grid, n_iter=20, random_state=0)

random_search.fit(X_train, y_train)

score_rs = random_search.score(X_test, y_test)





In [14]:
from mlxtend.classifier import StackingClassifier

classifier1 = DecisionTreeClassifier(random_state=0)
classifier2= DecisionTreeClassifier(random_state=1)
classifier3 = DecisionTreeClassifier(random_state=2)
classifier4= DecisionTreeClassifier(random_state=3)
classifier_list=[classifier1,classifier2,classifier3,classifier4]

m_classifier=LogisticRegression(random_state=0)

stacking = StackingClassifier(classifiers=classifier_list, meta_classifier=m_classifier)

stacking.fit(X_train, y_train)

score_stacking = stacking.score(X_test, y_test)





# Assessment



In [16]:
path = '/Users/raisaurabh04/OneDrive/GreyAtom/Practice Dataset/ensembling_assessment.csv'
df = pd.read_csv(path)

In [19]:
df.iloc[ : , : 5].head()

Unnamed: 0.1,Unnamed: 0,attr0,attr1,attr2,attr3
0,0,-4.049514,-5.055907,4.813832,10.975995
1,1,3.514292,4.721218,-2.536391,-8.388817
2,2,-11.31818,9.405884,29.141795,21.277405
3,3,-7.143218,-9.869755,-7.905797,4.271652
4,4,-5.027305,5.600857,10.312207,3.855865


In [53]:
df.info(verbose=True, null_counts=True)

df.isna().sum().value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7365 entries, 0 to 7364
Data columns (total 1091 columns):
Unnamed: 0    7365 non-null int64
attr0         7365 non-null float64
attr1         7365 non-null float64
attr2         7365 non-null float64
attr3         7365 non-null float64
attr4         7365 non-null float64
attr5         7365 non-null float64
attr6         7365 non-null float64
attr7         7365 non-null float64
attr8         7365 non-null float64
attr9         7365 non-null float64
attr10        7365 non-null float64
attr11        7365 non-null float64
attr12        7365 non-null float64
attr13        7365 non-null float64
attr14        7365 non-null float64
attr15        7365 non-null float64
attr16        7365 non-null float64
attr17        7365 non-null float64
attr18        7365 non-null float64
attr19        7365 non-null float64
attr20        7365 non-null float64
attr21        7365 non-null float64
attr22        7365 non-null float64
attr23        7365 non-null f

0    1091
dtype: int64

In [35]:
X = df.iloc[ : , : -1]

y = df.iloc[ : , -1]

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size= 0.3, random_state=4)

scalar = MinMaxScaler()

scalar.fit(X_train, y_train)

X_train = scalar.transform(X_train)

X_test = scalar.transform(X_test)

In [54]:
lr = LogisticRegression()

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

roc_score = roc_auc_score(y_test, y_pred)

In [59]:
roc_score

0.834042610697639

In [67]:
dt = DecisionTreeClassifier(random_state=4)

dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)

roc_score = roc_auc_score(y_test, y_pred)

In [68]:
roc_score

0.8732834218291986

In [74]:
rfc = RandomForestClassifier(random_state=4)

rfc.fit(X_train, y_train)

y_pred = rfc.predict(X_test)

roc_score = roc_auc_score(y_test, y_pred)



In [75]:
roc_score

0.9072251284029547

In [76]:
bagging_clf=BaggingClassifier(base_estimator=DecisionTreeClassifier(), 
                              max_samples=100, n_estimators=100, random_state=0)

bagging_clf.fit(X_train, y_train)

score_bagging = bagging_clf.score(X_test, y_test)

In [80]:
clf_1 = LogisticRegression()
clf_2 = DecisionTreeClassifier(random_state=4)
clf_3 = RandomForestClassifier(random_state=4)

model_list = [('lr',clf_1),('DT',clf_2),('RF',clf_3)]

In [81]:
voting_clf_hard = VotingClassifier(estimators=model_list, voting='hard')

voting_clf_hard.fit(X_train, y_train)

hard_voting_score = voting_clf_hard.score(X_test, y_test)