### Simulating Weighted Sampling

In [5]:
import pandas as pd 
import numpy as np 

# Load Titanic dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)

# Select a small subset
df_small = df[['Pclass', 'Sex', 'Age', 'Survived']].head(5)

# Assign initial weights
weights = np.array([1/5] * 5)
print("Initial Weights:", weights)

# Simulate misclassification (e.g., last instance is wrong)
misclassified = [False, False, False, False, True]
weights[misclassified] *= 2  # Increase weight of misclassified
print("Weights after misclassification adjustment:", weights)

weights /= weights.sum()  # Normalize weights
print("Normalized Weights:", weights)

# Sample with weights
weighted_sample = df_small.sample(n=5, replace=True, weights=weights, random_state=42)
print("Original Subset:")
print(df_small)
print("\nWeighted Sample:")
print(weighted_sample)

Initial Weights: [0.2 0.2 0.2 0.2 0.2]
Weights after misclassification adjustment: [0.2 0.2 0.2 0.2 0.4]
Normalized Weights: [0.16666667 0.16666667 0.16666667 0.16666667 0.33333333]
Original Subset:
   Pclass     Sex   Age  Survived
0       3    male  22.0         0
1       1  female  38.0         1
2       3  female  26.0         1
3       1  female  35.0         1
4       3    male  35.0         0

Weighted Sample:
   Pclass     Sex   Age  Survived
2       3  female  26.0         1
4       3    male  35.0         0
4       3    male  35.0         0
3       1  female  35.0         1
0       3    male  22.0         0


In [None]:
# Simulate two iterations of weight updates with four instances
for iteration in range(2):
    # Randomly select four instances from the original subset
    selected_indices = np.random.choice(df_small.index, size=4, replace=False)
    selected_instances = df_small.loc[selected_indices]
    
    # Assign new weights for the selected instances
    new_weights = np.array([1/4] * 4)
    
    # Simulate misclassification for the selected instances
    misclassified = [False, True, False, True]  # Example misclassification
    new_weights[misclassified] *= 2  # Increase weight of misclassified
    print(f"\nIteration {iteration + 1} Weights before normalization:", new_weights)
    
    new_weights /= new_weights.sum()  # Normalize weights
    print(f"Iteration {iteration + 1} Normalized Weights:", new_weights)
    
    # Sample with new weights
    weighted_sample_iter = selected_instances.sample(n=4, replace=True, weights=new_weights, random_state=42)
    print(f"Weighted Sample for Iteration {iteration + 1}:")
    print(weighted_sample_iter)

    # Weighting misclassified instances improves model performance by continuously focusing on harder-to-classify instances.
    # This iterative process allows the model to adapt and improve its accuracy over time.


Iteration 1 Weights before normalization: [0.25 0.5  0.25 0.5 ]
Iteration 1 Normalized Weights: [0.16666667 0.33333333 0.16666667 0.33333333]
Weighted Sample for Iteration 1:
   Pclass     Sex   Age  Survived
0       3    male  22.0         0
1       1  female  38.0         1
1       1  female  38.0         1
2       3  female  26.0         1

Iteration 2 Weights before normalization: [0.25 0.5  0.25 0.5 ]
Iteration 2 Normalized Weights: [0.16666667 0.33333333 0.16666667 0.33333333]
Weighted Sample for Iteration 2:
   Pclass     Sex   Age  Survived
1       1  female  38.0         1
0       3    male  22.0         0
0       3    male  22.0         0
4       3    male  35.0         0


Imagine a dataset with three passengers: A, B, and C, each with weight 1/3. The first weak learner correctly predicts A and B but misclassifies C. C’s weight increases to 1/2, while A and B’s weights drop to 1/4. The next learner focuses on C, correcting its prediction but perhaps misclassifying A. This process continues, with the final model combining all predictions, weighted by each learner’s accuracy.