In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
# Load dataset
from sklearn.datasets import load_iris
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
# Evaluate the model
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [6]:
# Regression Example
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd

# Load dataset
data = load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
target = pd.Series(data.target)
df['target'] = target

# Split dataset into training and testing sets
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train the model
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)
# Make predictions
y_pred = regressor.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 2952.0105887640448


In [8]:
# create the copy of X_test
X_test_copy = X_test.copy()
# add the predictions as a new column
X_test_copy['Predicted'] = y_pred
X_test_copy['Actual'] = y_test.values
# display the first few rows of the modified DataFrame
X_test_copy.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,Predicted,Actual
287,0.045341,-0.044642,-0.006206,-0.015999,0.125019,0.125198,0.019187,0.034309,0.032432,-0.00522,144.0,219.0
211,0.092564,-0.044642,0.036907,0.021872,-0.02496,-0.016658,0.000779,-0.039493,-0.022517,-0.021788,171.58,70.0
72,0.063504,0.05068,-0.00405,-0.012556,0.103003,0.04879,0.056003,-0.002592,0.084492,-0.017646,150.72,202.0
321,0.096197,-0.044642,0.051996,0.079265,0.054845,0.036577,-0.076536,0.141322,0.098648,0.061054,253.71,230.0
73,0.012648,0.05068,-0.020218,-0.002228,0.038334,0.053174,-0.006584,0.034309,-0.005142,-0.009362,107.19,111.0


In [12]:
# hyperparameter tuning using GridSearchCV
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=3, n_jobs=-1)
# Fit the model
grid_search.fit(X_train, y_train)
# Best parameters
print("Best Parameters:", grid_search.best_params_)
# Make predictions with the best estimator
best_regressor = grid_search.best_estimator_
y_pred = best_regressor.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error after Hyperparameter Tuning:", mse)


Best Parameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Mean Squared Error after Hyperparameter Tuning: 2960.3799663141613


108 fits failed out of a total of 216.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahultiwari/Documents/02_Freelancing/as_ds_course/.venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/rahultiwari/Documents/02_Freelancing/as_ds_course/.venv/lib/python3.13/site-packages/sklearn/base.py", line 1358, in wrapper
    estimator._validate_params()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/rahultiwari/Documents/02_Freelancing/as_ds_course/.venv/lib/python3.13/site-packages/sklearn/b

In [13]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error after Hyperparameter Tuning:", mse)

Mean Squared Error after Hyperparameter Tuning: 2960.3799663141613


In [3]:
import pandas as pd
import numpy as np

# 1. Create the Dataset
data = {
    'ID': range(1, 11),
    'Credit_Score': [650, 800, 750, 550, 580, 600, 450, 900, 720, 810],
    'Income_k': [50, 40, 80, 90, 120, 30, 25, 100, 20, 200],
    'Loan_Status': [0, 1, 1, 0, 1, 0, 0, 1, 0, 1]
}
df = pd.DataFrame(data)

# Add placeholder columns for the Excel formulas
columns_to_add = ['Initial_Weight', 'Pred_Round1', 'Is_Incorrect', 'Weight_Raw', 'Weight_Norm']
for col in columns_to_add:
    df[col] = '' 

# 2. Write to Excel with Formulas
output_path = 'boosting_example.xlsx'
writer = pd.ExcelWriter(output_path, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Round 1')

workbook = writer.book
worksheet = writer.sheets['Round 1']

# Formats
header_fmt = workbook.add_format({'bold': True, 'bg_color': '#DCE6F1', 'border': 1})
highlight_fmt = workbook.add_format({'bg_color': '#FFFF00'}) # Yellow for emphasis

# Apply Header Format
for col_num, value in enumerate(df.columns):
    worksheet.write(0, col_num, value, header_fmt)

# 3. Inject Formulas Row by Row
# Note: Excel rows are 1-based. Python is 0-based. 
# Data starts at Row 2 (index 1 in xlsxwriter loop, but row reference is i+2)

for i in range(len(df)):
    row_idx = i + 2  # Excel Row Number (2, 3, 4...)
    
    # E: Initial Weight = 1 / Count of rows
    worksheet.write_formula(f'E{row_idx}', f'=1/COUNT($A$2:$A$11)')
    
    # F: Prediction (Rule: Credit Score > 600) -> 1 if True, 0 if False
    worksheet.write_formula(f'F{row_idx}', f'=IF(B{row_idx}>600, 1, 0)')
    
    # G: Is Incorrect? (Prediction <> Actual)
    worksheet.write_formula(f'G{row_idx}', f'=IF(F{row_idx}<>D{row_idx}, 1, 0)')
    
    # H: Weight Raw = Old_Weight * EXP( Alpha * (1 if Incorrect, -1 if Correct) )
    # Note: We will put Alpha in cell L3. 
    # Logic: IF Incorrect(1), use Alpha. IF Correct(0), use -Alpha.
    worksheet.write_formula(f'H{row_idx}', f'=E{row_idx}*EXP(IF(G{row_idx}=1, $L$3, -$L$3))')
    
    # I: Normalized Weight = Raw / Sum(Raw)
    worksheet.write_formula(f'I{row_idx}', f'=H{row_idx}/SUM($H$2:$H$11)')

# 4. Add Summary Statistics (Alpha Calculation)
worksheet.write('K2', 'Total Error', header_fmt)
worksheet.write_formula('L2', '=SUMPRODUCT(G2:G11, E2:E11)')

worksheet.write('K3', 'Alpha (Say)', header_fmt)
worksheet.write_formula('L3', '=0.5*LN((1-L2)/L2)')

# 5. Formatting touches
worksheet.set_column('A:A', 5)
worksheet.set_column('B:D', 12)
worksheet.set_column('E:I', 15)
worksheet.set_column('K:L', 15)

writer.close()

In [2]:
pip install xlsxwriter

Collecting xlsxwriter
  Using cached xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Using cached xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
Installing collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
