In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ---------------------------------------------------------
# 1. SETUP
# ---------------------------------------------------------
train = pd.read_csv('train.csv')
X_raw = train.drop(columns=['id', 'target']).values
y_raw = train['target'].values

# Split once
X_train_raw, X_cv_raw, y_train_raw, y_cv_raw = train_test_split(
    X_raw, y_raw, test_size=0.2, random_state=42
)

scaler = StandardScaler()
scaler.fit(X_train_raw)
X_cv_scaled = scaler.transform(X_cv_raw)

# ---------------------------------------------------------
# 2. THE SCIENTIFIC LOOP (10 Seeds per Multiplier)
# ---------------------------------------------------------
# We verify multipliers 1 to 9
seeds = [1, 2, 3, 4, 5, 42, 100, 2023, 7, 99] 

print(f"{'Mult':<5} | {'Avg Accuracy':<12} | {'Min (Worst)':<12} | {'Max (Best)':<12} | {'Stability'}")
print("-" * 65)

for multiplier in range(1, 10):
    
    # Prepare data for this multiplier
    train_df = pd.DataFrame(X_train_raw)
    train_df['target'] = y_train_raw
    red_pills = train_df[train_df['target'] == 0]
    blue_pills = train_df[train_df['target'] == 1]
    
    train_balanced = pd.concat([red_pills] + [blue_pills] * multiplier)
    X_train_balanced = train_balanced.drop(columns=['target']).values
    y_train_balanced = train_balanced['target'].values
    X_train_scaled = scaler.transform(X_train_balanced)
    
    accuracies = []
    
    # Run 10 times to get the "True" performance
    for seed in seeds:
        # Reset Randomness completely
        tf.random.set_seed(seed)
        np.random.seed(seed)
        
        model = Sequential([
            Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
            Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
            Dense(1, activation='linear') 
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=0.001), 
            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
        )
        
        model.fit(
            X_train_scaled, 
            y_train_balanced, 
            epochs=50,
            verbose=0
        )
        
        z_cv = model.predict(X_cv_scaled, verbose=0)
        yhat_cv = (z_cv >= 0).astype(int).flatten()
        accuracies.append(np.mean(yhat_cv == y_cv_raw))
        
    avg_acc = np.mean(accuracies)
    min_acc = np.min(accuracies)
    max_acc = np.max(accuracies)
    diff = max_acc - min_acc
    
    # If the difference between Best and Worst run is small, it's Stable.
    stability = "Stable" if diff < 0.05 else "Risky"
    
    print(f"x{multiplier:<4} | {avg_acc:.4f}       | {min_acc:.4f}       | {max_acc:.4f}       | {stability}")

Mult  | Avg Accuracy | Min (Worst)  | Max (Best)   | Stability
-----------------------------------------------------------------
x1    | 0.8633       | 0.8225       | 0.8925       | Risky
x2    | 0.8640       | 0.8525       | 0.8900       | Stable
x3    | 0.8688       | 0.8450       | 0.8850       | Stable
x4    | 0.8500       | 0.6475       | 0.8900       | Risky
x5    | 0.8662       | 0.8200       | 0.8925       | Risky
x6    | 0.8707       | 0.8575       | 0.8825       | Stable
x7    | 0.8678       | 0.8300       | 0.8875       | Risky
x8    | 0.8650       | 0.8350       | 0.8900       | Risky
x9    | 0.8533       | 0.8100       | 0.8875       | Risky


In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ---------------------------------------------------------
# 1. SETUP DATA
# ---------------------------------------------------------
train = pd.read_csv('train.csv')
X_raw = train.drop(columns=['id', 'target']).values
y_raw = train['target'].values

# Split once (Fixed validation set for fair comparison)
X_train_raw, X_cv_raw, y_train_raw, y_cv_raw = train_test_split(
    X_raw, y_raw, test_size=0.2, random_state=42
)

# Scaling (Fit on raw training data only)
scaler = StandardScaler()
scaler.fit(X_train_raw)
X_cv_scaled = scaler.transform(X_cv_raw)

# ---------------------------------------------------------
# 2. PREPARE THE WINNER (Multiplier x6)
# ---------------------------------------------------------
BEST_MULTIPLIER = 6

train_df = pd.DataFrame(X_train_raw)
train_df['target'] = y_train_raw
red_pills = train_df[train_df['target'] == 0]
blue_pills = train_df[train_df['target'] == 1]

# Create balanced data
train_balanced = pd.concat([red_pills] + [blue_pills] * BEST_MULTIPLIER)
X_train_balanced = train_balanced.drop(columns=['target']).values
y_train_balanced = train_balanced['target'].values

# Scale the balanced training data
X_train_scaled = scaler.transform(X_train_balanced)

# ---------------------------------------------------------
# 3. ROBUST LAMBDA TUNING (Averaging over 10 seeds)
# ---------------------------------------------------------
# Andrew Ng suggests logarithmic scale steps
lambda_values = [0.0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1]

# We reuse your 10 seeds for maximum rigor
seeds = [1, 2, 3, 4, 5, 42, 100, 2023, 7, 99]

print(f"Starting Lambda Tuning with Blue Pills x{BEST_MULTIPLIER}...")
print("-" * 65)
print(f"{'Lambda':<10} | {'Avg Accuracy':<12} | {'Min (Worst)':<12} | {'Stability'}")
print("-" * 65)

for lambda_val in lambda_values:
    accuracies = []
    
    # Inner Loop: Run 10 times per lambda
    for seed in seeds:
        tf.random.set_seed(seed)
        np.random.seed(seed)
        
        model = Sequential([
            Dense(128, activation='relu', kernel_regularizer=l2(lambda_val)),
            Dense(64, activation='relu', kernel_regularizer=l2(lambda_val)),
            Dense(1, activation='linear') 
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=0.001), 
            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
        )
        
        model.fit(
            X_train_scaled, 
            y_train_balanced, 
            epochs=50,
            verbose=0
        )
        
        z_cv = model.predict(X_cv_scaled, verbose=0)
        yhat_cv = (z_cv >= 0).astype(int).flatten()
        accuracies.append(np.mean(yhat_cv == y_cv_raw))

    avg_acc = np.mean(accuracies)
    min_acc = np.min(accuracies)
    diff = np.max(accuracies) - min_acc
    stability = "Stable" if diff < 0.05 else "Risky"
    
    print(f"{lambda_val:<10} | {avg_acc:.4f}       | {min_acc:.4f}       | {stability}")

Starting Lambda Tuning with Blue Pills x6...
-----------------------------------------------------------------
Lambda     | Avg Accuracy | Min (Worst)  | Stability
-----------------------------------------------------------------



KeyboardInterrupt



In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler

# ---------------------------------------------------------
# 1. LOAD ALL DATA
# ---------------------------------------------------------
print("Loading full dataset...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Save Test IDs
test_ids = test['id']

# Prepare Full Arrays (No splitting!)
X_raw = train.drop(columns=['id', 'target']).values
y_raw = train['target'].values
X_test_values = test.drop(columns=['id']).values

# ---------------------------------------------------------
# 2. SCALE ON 100% OF DATA
# ---------------------------------------------------------
scaler = StandardScaler()
# Fit on the ENTIRE training set
scaler.fit(X_raw)

# Transform everything
X_train_scaled = scaler.transform(X_raw)
X_test_scaled = scaler.transform(X_test_values)

# ---------------------------------------------------------
# 3. APPLY WINNING STRATEGY (x6) TO FULL DATA
# ---------------------------------------------------------
print("Applying Blue Pills x6 to the full dataset...")

# We use the DataFrame logic again for easy filtering
train_df = pd.DataFrame(X_train_scaled) # Use scaled data directly
train_df['target'] = y_raw

red_pills = train_df[train_df['target'] == 0]
blue_pills = train_df[train_df['target'] == 1]

# Apply x6 Multiplier
train_balanced = pd.concat([red_pills] + [blue_pills] * 6)

# Extract values for training
X_train_final = train_balanced.drop(columns=['target']).values
y_train_final = train_balanced['target'].values

# ---------------------------------------------------------
# 4. TRAIN CHAMPION MODEL (Lambda = 0.0)
# ---------------------------------------------------------
print("Training on 100% of data...")

tf.random.set_seed(1)
np.random.seed(1)

model = Sequential([
    # WINNER: Lambda = 0.0
    Dense(128, activation='relu', kernel_regularizer=l2(0.0)),
    Dense(64, activation='relu', kernel_regularizer=l2(0.0)),
    Dense(1, activation='linear')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
)

model.fit(
    X_train_final,
    y_train_final,
    epochs=50,
    verbose=0
)

# ---------------------------------------------------------
# 5. GENERATE SUBMISSION
# ---------------------------------------------------------
print("Generating submission.csv...")
z_test = model.predict(X_test_scaled, verbose=0)
predictions = (z_test >= 0).astype(int).flatten()

submission = pd.DataFrame({
    'id': test_ids,
    'target': predictions
})

submission.to_csv('submission.csv', index=False)

# Optional: Check how many 1s we predicted for the Test Set
num_ones = np.sum(predictions)
print(f"Done! Predicted {num_ones} Blue Pills for the Test Set.")

Loading full dataset...
Applying Blue Pills x6 to the full dataset...
Training on 100% of data...
Generating submission.csv...
Done! Predicted 190 Blue Pills for the Test Set.


In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ---------------------------------------------------------
# 1. LOAD DATA
# ---------------------------------------------------------
print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Save test IDs for the submission file
test_ids = test['id']

# Prepare arrays
X_raw = train.drop(columns=['id', 'target']).values
y_raw = train['target'].values
X_test_values = test.drop(columns=['id']).values

# ---------------------------------------------------------
# 2. THE FINAL SPLIT & SCALE
# ---------------------------------------------------------
# We keep the split to perform one last "Sanity Check" before saving.
# It ensures we don't submit a broken model blindly.
X_train_raw, X_cv_raw, y_train_raw, y_cv_raw = train_test_split(
    X_raw, y_raw, test_size=0.2, random_state=42
)

scaler = StandardScaler()
scaler.fit(X_train_raw)

X_train_scaled_raw = scaler.transform(X_train_raw)
X_cv_scaled = scaler.transform(X_cv_raw)
X_test_scaled = scaler.transform(X_test_values)

# ---------------------------------------------------------
# 3. APPLY WINNING STRATEGY (Multiplier x6)
# ---------------------------------------------------------
print("Applying winning strategy: Blue Pills x6...")

# Convert back to DF temporarily to filter
train_df = pd.DataFrame(X_train_raw)
train_df['target'] = y_train_raw

red_pills = train_df[train_df['target'] == 0]
blue_pills = train_df[train_df['target'] == 1]

# Apply x6 Multiplier
train_balanced = pd.concat([red_pills] + [blue_pills] * 6)

# Extract values again
X_train_balanced = train_balanced.drop(columns=['target']).values
y_train_balanced = train_balanced['target'].values

# Scale the balanced training data
X_train_balanced_scaled = scaler.transform(X_train_balanced)

# ---------------------------------------------------------
# 4. TRAIN CHAMPION MODEL (Lambda = 0.0)
# ---------------------------------------------------------
print("Training the Champion Model...")

# Set seed for reproducibility
tf.random.set_seed(1)
np.random.seed(1)

model = Sequential([
    # WINNER: Lambda = 0.0
    Dense(128, activation='relu', kernel_regularizer=l2(0.0)),
    Dense(64, activation='relu', kernel_regularizer=l2(0.0)),
    Dense(1, activation='linear')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
)

model.fit(
    X_train_balanced_scaled,
    y_train_balanced,
    epochs=50,
    verbose=0
)

# ---------------------------------------------------------
# 5. SANITY CHECK (Crucial Step)
# ---------------------------------------------------------
z_cv = model.predict(X_cv_scaled, verbose=0)
yhat_cv = (z_cv >= 0).astype(int).flatten()
acc = np.mean(yhat_cv == y_cv_raw)
ones_predicted = np.sum(yhat_cv)

print("-" * 30)
print(f"Final Sanity Check Accuracy: {acc:.4f}")
print(f"Blue Pills found in CV Set:  {ones_predicted}")

if ones_predicted == 0:
    print("⚠️ STOP! The model is predicting all zeros. Do not submit.")
else:
    print("✅ SUCCESS! The model is active and learning.")

    # ---------------------------------------------------------
    # 6. GENERATE SUBMISSION
    # ---------------------------------------------------------
    print("Generating submission.csv...")
    z_test = model.predict(X_test_scaled, verbose=0)
    predictions = (z_test >= 0).astype(int).flatten()

    submission = pd.DataFrame({
        'id': test_ids,
        'target': predictions
    })

    submission.to_csv('PredictionsJustOn1600Examples.csv', index=False)
    print("Done! 'submission.csv' is ready for upload.")

num_ones = np.sum(predictions)
print(f"Done! Predicted {num_ones} Blue Pills for the Test Set.")

Loading data...
Applying winning strategy: Blue Pills x6...
Training the Champion Model...
------------------------------
Final Sanity Check Accuracy: 0.8825
Blue Pills found in CV Set:  11
✅ SUCCESS! The model is active and learning.
Generating submission.csv...
Done! 'submission.csv' is ready for upload.
Done! Predicted 175 Blue Pills for the Test Set.
