In [480]:
import pandas as pd
import numpy as np
from xgboost.dask import predict

In [481]:
df1 = pd.DataFrame()
df1['X1'] = [1,2,3,4,5,6,6,7,9,9]
df1['X2'] = [5,3,6,8,1,9,5,8,9,2]
df1['label'] = [1,1,0,1,0,1,0,1,0,0]

In [482]:
df1

Unnamed: 0,X1,X2,label
0,1,5,1
1,2,3,1
2,3,6,0
3,4,8,1
4,5,1,0
5,6,9,1
6,6,5,0
7,7,8,1
8,9,9,0
9,9,2,0


In [483]:
def assign_weights(df):
  df['weights'] = 1/df.shape[0]

In [484]:
assign_weights(df1)

In [485]:
X = df1.iloc[:,0:2].values
y = df1.iloc[:,2].values

In [486]:
X

array([[1, 5],
       [2, 3],
       [3, 6],
       [4, 8],
       [5, 1],
       [6, 9],
       [6, 5],
       [7, 8],
       [9, 9],
       [9, 2]])

In [487]:
y

array([1, 1, 0, 1, 0, 1, 0, 1, 0, 0])

In [488]:
from sklearn.tree import DecisionTreeClassifier
dt1 = DecisionTreeClassifier(max_depth = 1)
dt1.fit(X,y)
df1['y_pred'] = dt1.predict(X)


In [489]:
df1

Unnamed: 0,X1,X2,label,weights,y_pred
0,1,5,1,0.1,1
1,2,3,1,0.1,1
2,3,6,0,0.1,1
3,4,8,1,0.1,1
4,5,1,0,0.1,0
5,6,9,1,0.1,1
6,6,5,0,0.1,1
7,7,8,1,0.1,1
8,9,9,0,0.1,1
9,9,2,0,0.1,0


In [490]:
'''
def cal_error(df):
    err = 0
    for i in range(df.shape[0]):
        if df['y_pred'][i] != df['label'][i]:
            err += df['weights'][i]
            err = err.round(2)
    return err
'''
def cal_error(df):
    err = 0
    for i in range(df.shape[0]):
        if df.iloc[i]['y_pred'] != df.iloc[i]['label']:
            err += df.iloc[i]['weights']
            err = round(err, 2)
    return err


In [491]:
error = cal_error(df1)

In [492]:
def cal_alpha(error):
    alpha = 0.5*np.log((1-error)/(error+0.000000001))
    return alpha.round(2)

In [493]:
alpha1 = cal_alpha(error)

In [494]:
def updated_weights(df, alpha):
    df['updated_weights'] = np.float64(0.0)  # Initialize with zeros
    for i in range(df.shape[0]):
        if df.iloc[i]['y_pred'] != df.iloc[i]['label']:
            df.iloc[i, df.columns.get_loc('updated_weights')] = df.iloc[i]['weights'] * np.exp(alpha)
        else:
            df.iloc[i, df.columns.get_loc('updated_weights')] = df.iloc[i]['weights'] * np.exp(-alpha)


In [495]:
updated_weights(df1,alpha1)

In [496]:
df1

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights
0,1,5,1,0.1,1,0.065705
1,2,3,1,0.1,1,0.065705
2,3,6,0,0.1,1,0.152196
3,4,8,1,0.1,1,0.065705
4,5,1,0,0.1,0,0.065705
5,6,9,1,0.1,1,0.065705
6,6,5,0,0.1,1,0.152196
7,7,8,1,0.1,1,0.065705
8,9,9,0,0.1,1,0.152196
9,9,2,0,0.1,0,0.065705


In [497]:
def norm_weights(df):
    df['norm_weights'] = df['updated_weights']/df['updated_weights'].sum()

In [498]:
norm_weights(df1)

In [499]:
df1

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,norm_weights
0,1,5,1,0.1,1,0.065705,0.071689
1,2,3,1,0.1,1,0.065705,0.071689
2,3,6,0,0.1,1,0.152196,0.166059
3,4,8,1,0.1,1,0.065705,0.071689
4,5,1,0,0.1,0,0.065705,0.071689
5,6,9,1,0.1,1,0.065705,0.071689
6,6,5,0,0.1,1,0.152196,0.166059
7,7,8,1,0.1,1,0.065705,0.071689
8,9,9,0,0.1,1,0.152196,0.166059
9,9,2,0,0.1,0,0.065705,0.071689


In [500]:
def get_range(df):
    df['upper'] = df['norm_weights'].cumsum()
    df['lower'] = df['upper'] - df['norm_weights']

In [501]:
get_range(df1)

In [502]:
df1

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,norm_weights,upper,lower
0,1,5,1,0.1,1,0.065705,0.071689,0.071689,0.0
1,2,3,1,0.1,1,0.065705,0.071689,0.143378,0.071689
2,3,6,0,0.1,1,0.152196,0.166059,0.309437,0.143378
3,4,8,1,0.1,1,0.065705,0.071689,0.381126,0.309437
4,5,1,0,0.1,0,0.065705,0.071689,0.452815,0.381126
5,6,9,1,0.1,1,0.065705,0.071689,0.524505,0.452815
6,6,5,0,0.1,1,0.152196,0.166059,0.690563,0.524505
7,7,8,1,0.1,1,0.065705,0.071689,0.762252,0.690563
8,9,9,0,0.1,1,0.152196,0.166059,0.928311,0.762252
9,9,2,0,0.1,0,0.065705,0.071689,1.0,0.928311


In [503]:
def get_index(df):
    indices = []
    for i in range(df.shape[0]):
        a = np.random.random()
        for j in range(df.shape[0]):
            if df.iloc[j]['upper'] > a > df.iloc[j]['lower']:
                indices.append(j)
    return indices

In [504]:
idx = get_index(df1)

In [505]:
idx

[0, 9, 6, 6, 0, 8, 6, 6, 2, 8]

In [506]:
df1

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,norm_weights,upper,lower
0,1,5,1,0.1,1,0.065705,0.071689,0.071689,0.0
1,2,3,1,0.1,1,0.065705,0.071689,0.143378,0.071689
2,3,6,0,0.1,1,0.152196,0.166059,0.309437,0.143378
3,4,8,1,0.1,1,0.065705,0.071689,0.381126,0.309437
4,5,1,0,0.1,0,0.065705,0.071689,0.452815,0.381126
5,6,9,1,0.1,1,0.065705,0.071689,0.524505,0.452815
6,6,5,0,0.1,1,0.152196,0.166059,0.690563,0.524505
7,7,8,1,0.1,1,0.065705,0.071689,0.762252,0.690563
8,9,9,0,0.1,1,0.152196,0.166059,0.928311,0.762252
9,9,2,0,0.1,0,0.065705,0.071689,1.0,0.928311


In [507]:
df2 = df1.iloc[idx,0:4]

In [508]:
df2

Unnamed: 0,X1,X2,label,weights
0,1,5,1,0.1
9,9,2,0,0.1
6,6,5,0,0.1
6,6,5,0,0.1
0,1,5,1,0.1
8,9,9,0,0.1
6,6,5,0,0.1
6,6,5,0,0.1
2,3,6,0,0.1
8,9,9,0,0.1


In [509]:
dt2 = DecisionTreeClassifier(max_depth = 1)

In [510]:
X = df2.iloc[:,0:2].values
y= df2.iloc[:,2].values

In [511]:
dt2.fit(X,y)

In [512]:
df2['y_pred'] = dt2.predict(X)

In [513]:
error = cal_error(df2)

In [514]:
df2

Unnamed: 0,X1,X2,label,weights,y_pred
0,1,5,1,0.1,1
9,9,2,0,0.1,0
6,6,5,0,0.1,0
6,6,5,0,0.1,0
0,1,5,1,0.1,1
8,9,9,0,0.1,0
6,6,5,0,0.1,0
6,6,5,0,0.1,0
2,3,6,0,0.1,0
8,9,9,0,0.1,0


In [515]:
alpha2 = cal_alpha(error)

In [516]:
updated_weights(df2,alpha2)

In [517]:
norm_weights(df2)

In [518]:
get_range(df2)

In [519]:
get_index(df2)

[2, 2, 4, 6, 1, 6, 3, 8, 0, 0]

In [520]:
df3 = df2.iloc[idx,0:4]

In [521]:
df3

Unnamed: 0,X1,X2,label,weights
0,1,5,1,0.1
8,9,9,0,0.1
6,6,5,0,0.1
6,6,5,0,0.1
0,1,5,1,0.1
2,3,6,0,0.1
6,6,5,0,0.1
6,6,5,0,0.1
6,6,5,0,0.1
2,3,6,0,0.1


In [522]:
X = df3.iloc[:,0:2].values
y= df3.iloc[:,2].values

In [523]:
dt3 = DecisionTreeClassifier(max_depth = 1)

In [524]:
dt3.fit(X,y)

In [525]:
df3['y_pred'] = dt3.predict(X)

In [526]:
error = cal_error(df3)

In [527]:
alpha3 = cal_alpha(error)

In [528]:
alpha1

np.float64(0.42)

In [529]:
alpha2

np.float64(10.36)

In [530]:
alpha3

np.float64(10.36)

In [531]:
query = np.array([1,5]).reshape(1,2)
dt1.predict(query)

array([1])

In [532]:
dt2.predict(query)

array([1])

In [533]:
dt3.predict(query)

array([1])

In [542]:
alpha1*1 + alpha2*(1) + alpha3*(1)

np.float64(21.14)

In [543]:
np.sign(21.14)


np.float64(1.0)

In [536]:
query = np.array([9,9]).reshape(1,2)
dt1.predict(query)

array([1])

In [537]:
dt2.predict(query)

array([0])

In [538]:
dt3.predict(query)

array([0])

In [544]:
alpha1*(1) + alpha2*(-1) + alpha3*(-1)

np.float64(-20.299999999999997)

In [545]:
np.sign(-20.29)

np.float64(-1.0)

In [552]:
'''
predictt = []
out = []
final = []
for i in range(df1.shape[0]):
    query = np.array(np.array([df1.iloc[i]['X1'], df1.iloc[i]['X2']])).reshape(1,2)
    predictt.append(dt1.predict(query))
    predictt.append(dt2.predict(query))
    predictt.append(dt3.predict(query))
    for j in range(3):
        if predictt[j] > 0:
            out.append(1)
        else:
            out.append(0)
        final.append(alpha1*out[j] + alpha2*out[j] + alpha3*out[j])
'''
predictt = []
final = []
for i in range(df1.shape[0]):
    query = np.array([df1.iloc[i]['X1'], df1.iloc[i]['X2']]).reshape(1, 2)

    # Get predictions from all models
    predictt = [
        dt1.predict(query)[0],
        dt2.predict(query)[0],
        dt3.predict(query)[0]
    ]

    # Convert predictions to binary (0 or 1) and calculate weighted final value
    out = [1 if pred > 0 else 0 for pred in predictt]
    final.append(alpha1 * out[0] + alpha2 * out[1] + alpha3 * out[2])



In [559]:
final = np.sign([final]).reshape(10,1)
pd.DataFrame(final)

Unnamed: 0,0
0,1.0
1,1.0
2,1.0
3,1.0
4,0.0
5,1.0
6,1.0
7,1.0
8,1.0
9,0.0


In [569]:
df1_copy = df1.join(pd.DataFrame(final))

In [574]:
df1_copy[['label','y_pred','final']]

KeyError: "['final'] not in index"

In [573]:
df1_copy.rename(columns={'label':'label','y_pred':'y_pred','0':'final'},inplace=True)

In [575]:
df1_copy

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,norm_weights,upper,lower,0
0,1,5,1,0.1,1,0.065705,0.071689,0.071689,0.0,1.0
1,2,3,1,0.1,1,0.065705,0.071689,0.143378,0.071689,1.0
2,3,6,0,0.1,1,0.152196,0.166059,0.309437,0.143378,1.0
3,4,8,1,0.1,1,0.065705,0.071689,0.381126,0.309437,1.0
4,5,1,0,0.1,0,0.065705,0.071689,0.452815,0.381126,0.0
5,6,9,1,0.1,1,0.065705,0.071689,0.524505,0.452815,1.0
6,6,5,0,0.1,1,0.152196,0.166059,0.690563,0.524505,1.0
7,7,8,1,0.1,1,0.065705,0.071689,0.762252,0.690563,1.0
8,9,9,0,0.1,1,0.152196,0.166059,0.928311,0.762252,1.0
9,9,2,0,0.1,0,0.065705,0.071689,1.0,0.928311,0.0


In [576]:
df1_copy.drop(columns=['X1','X2','weights','updated_weights','norm_weights','upper','lower'], inplace=True)

In [577]:
df1_copy

Unnamed: 0,label,y_pred,0
0,1,1,1.0
1,1,1,1.0
2,0,1,1.0
3,1,1,1.0
4,0,0,0.0
5,1,1,1.0
6,0,1,1.0
7,1,1,1.0
8,0,1,1.0
9,0,0,0.0
