In [11]:
# QUANTUM MACHINE LEARNING - PATTERN RECOGNITION & OPTIMIZATION
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
import pennylane as qml

In [12]:
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error, r2_score
from scipy.optimize import minimize

In [13]:
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)


In [14]:
# PART 1: LOAD SAVED DATA & MODELS
# Loading saved data
df = pd.read_csv('data/wage_gap_data_from_government_sources.csv')
X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv').values.ravel()


In [23]:
# Loading saved models 
model_lr = pickle.load(open('models/logistic_model.pkl', 'rb'))
model_rf = pickle.load(open('models/random_forest_model.pkl', 'rb'))
model_gb = pickle.load(open('models/gradient_boosting_model.pkl', 'rb'))
ensemble = pickle.load(open('models/ensemble_model.pkl', 'rb'))
scaler = pickle.load(open('models/scaler.pkl', 'rb'))
le_gender = pickle.load(open('models/le_gender.pkl', 'rb'))
le_education = pickle.load(open('models/le_education.pkl', 'rb'))

In [34]:
# PART 2: QUANTUM FAIR WAGE PREDICTION

# Encoding data
if 'gender_encoded' not in df.columns:
    df['gender_encoded'] = le_gender.fit_transform(df['gender'])
    df['education_encoded'] = le_education.fit_transform(df['education_level'])

X_normalized = scaler.fit_transform(df[['gender_encoded', 'education_encoded', 'experience_years']])
y_fair_wages = df['fair_wage_estimate'].values



In [35]:
# Prepare data for quantum model
X_normalized = scaler.fit_transform(
    df[[
        'gender_encoded' if 'gender_encoded' not in df.columns else 'gender',
        'education_encoded' if 'education_encoded' not in df.columns else 'education_level',
        'experience_years'
    ]]
)

In [36]:
# Create quantum device
n_qubits = 3
dev = qml.device('default.qubit', wires=n_qubits)


In [37]:
@qml.qnode(dev)
def quantum_fair_wage_predictor(features, weights):
    """Quantum circuit for fair wage prediction"""
    
    # Encode features
    for i in range(n_qubits):
        qml.RY(features[i] * np.pi, wires=i)
    
    # Parameterized gates
    for i in range(n_qubits):
        qml.RY(weights[i], wires=i)
        qml.CNOT(wires=[i, (i+1) % n_qubits])
    
    return qml.expval(qml.PauliZ(0))


In [38]:
# Train quantum model
initial_weights = np.random.rand(n_qubits) * 0.1

In [39]:
def quantum_loss_function(weights):
    total_loss = 0
    n_samples = min(100, len(X_normalized))
    
    for i in range(n_samples):
        quantum_pred = quantum_fair_wage_predictor(X_normalized[i], weights)
        predicted_wage = (quantum_pred + 1) / 2 * 70000
        error = (predicted_wage - y_fair_wages[i]) ** 2
        total_loss += error
    
    return total_loss / n_samples


In [40]:
result = minimize(quantum_loss_function, initial_weights, method='COBYLA', 
                  options={'maxiter': 100, 'rhobeg': 0.5})


In [42]:
optimal_weights = result.x
final_loss = result.fun


In [43]:

print(f"  Final Loss: {final_loss:.4f}")
print(f"  Optimization Success: {result.success}")

  Final Loss: 360639011.9606
  Optimization Success: True


In [44]:
# Evaluate
quantum_predictions = []
n_test_samples = min(50, len(X_normalized))


In [45]:
for i in range(n_test_samples):
    quantum_output = quantum_fair_wage_predictor(X_normalized[i], optimal_weights)
    predicted_wage = (quantum_output + 1) / 2 * 70000
    quantum_predictions.append(predicted_wage)


In [46]:
quantum_predictions = np.array(quantum_predictions)
actual_wages = y_fair_wages[:n_test_samples]


In [47]:
quantum_mse = mean_squared_error(actual_wages, quantum_predictions)
quantum_rmse = np.sqrt(quantum_mse)
quantum_r2 = r2_score(actual_wages, quantum_predictions)


In [80]:
print(f"  MSE: {quantum_mse:.2f}")
print(f"  RMSE: ₹{quantum_rmse:.2f}")
print(f"  R² Score: {quantum_r2:.4f}")
print(f"  Average Prediction Error: ₹{np.mean(np.abs(actual_wages - quantum_predictions)):.2f}")

  MSE: 346543119.87
  RMSE: ₹18615.67
  R² Score: -4.7003
  Average Prediction Error: ₹15603.81


In [81]:
# Show sample predictions
print(f"\nSample Predictions (First 5 workers):")
for i in range(min(5, n_test_samples)):
    actual = actual_wages[i]
    predicted = quantum_predictions[i]
    error = abs(actual - predicted)
    print(f"  Worker {i+1}: Actual: ₹{actual:.0f}, Predicted: ₹{predicted:.0f}, Error: ₹{error:.0f}")


Sample Predictions (First 5 workers):
  Worker 1: Actual: ₹14000, Predicted: ₹30569, Error: ₹16569
  Worker 2: Actual: ₹18000, Predicted: ₹24494, Error: ₹6494
  Worker 3: Actual: ₹18000, Predicted: ₹24116, Error: ₹6116
  Worker 4: Actual: ₹9000, Predicted: ₹23645, Error: ₹14645
  Worker 5: Actual: ₹9000, Predicted: ₹25528, Error: ₹16528


In [75]:
# PART 3: QUANTUM PATTERN RECOGNITION
@qml.qnode(dev)
def quantum_feature_map(features):
    """Quantum feature mapping"""
    
    for i in range(n_qubits):
        qml.RY(features[i] * np.pi, wires=i)
    
    for i in range(n_qubits):
        qml.CNOT(wires=[i, (i+1) % n_qubits])
    
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]



In [76]:
df['gap_importance'] = df['fair_wage_gap'].abs()
high_gap_workers = df.nlargest(10, 'gap_importance')

In [77]:
quantum_patterns = []
gap_values = []
industries = []

for idx, (_, worker) in enumerate(high_gap_workers.iterrows()):
    gender_enc = 1 if worker['gender'] == 'Male' else 0
    edu_mapping = {'Illiterate': 0, '5th Pass': 0.2, '10th Pass': 0.4, 
                   '12th Pass': 0.6, 'Diploma': 0.75, 'Graduate': 0.9, 'Post-Graduate': 1.0}
    edu_enc = edu_mapping.get(worker['education_level'], 0.5)
    exp_norm = min(worker['experience_years'] / 35, 1.0)
    
    features = np.array([gender_enc, edu_enc, exp_norm])
    pattern = quantum_feature_map(features)
    quantum_patterns.append(pattern)
    gap_values.append(worker['fair_wage_gap'])
    industries.append(worker['industry'])


In [82]:
# Cluster workers

quantum_patterns_array = np.array(quantum_patterns)
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(quantum_patterns_array)

for cluster_id in range(3):
    cluster_workers = [industries[i] for i in range(len(clusters)) if clusters[i] == cluster_id]
    cluster_gaps = [gap_values[i] for i in range(len(clusters)) if clusters[i] == cluster_id]
    
    print(f"Cluster {cluster_id + 1}:")
    print(f"  Workers: {cluster_workers}")
    print(f"  Avg Gap: ₹{np.mean(cluster_gaps):.0f}")
    print(f"  Industries: {list(set(cluster_workers))}")
    print()



Cluster 1:
  Workers: ['Agriculture', 'Food Processing', 'Agriculture', 'Textiles', 'Mining', 'Manufacturing', 'Food Processing', 'Construction', 'Agriculture', 'Retail']
  Avg Gap: ₹-17135
  Industries: ['Agriculture', 'Mining', 'Textiles', 'Manufacturing', 'Construction', 'Retail', 'Food Processing']

Cluster 2:
  Workers: []
  Avg Gap: ₹nan
  Industries: []

Cluster 3:
  Workers: []
  Avg Gap: ₹nan
  Industries: []



  return fit_method(estimator, *args, **kwargs)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [72]:
# PART 4: QUANTUM-INSPIRED POLICY OPTIMIZATION
policies = {
    'minimum_wage_enforcement': 0.15,
    'contract_transparency': 0.10,
    'discrimination_ban': 0.12,
    'enforcement_improvement': 0.18,
    'skill_training': 0.08
}

policy_costs = {
    'minimum_wage_enforcement': 5,
    'contract_transparency': 2,
    'discrimination_ban': 3,
    'enforcement_improvement': 7,
    'skill_training': 6
}


In [84]:
# Create a mapping of workers to clusters
cluster_mapping = {}
for i, cluster_id in enumerate(clusters):
    if cluster_id not in cluster_mapping:
        cluster_mapping[cluster_id] = []
    cluster_mapping[cluster_id].append(i)

In [85]:
print(f"Cluster Distribution:")
for cluster_id in range(3):
    num_workers = len(cluster_mapping.get(cluster_id, []))
    print(f"  Cluster {cluster_id + 1}: {num_workers} workers")

Cluster Distribution:
  Cluster 1: 10 workers
  Cluster 2: 0 workers
  Cluster 3: 0 workers


In [86]:
def evaluate_policy_for_cluster(policy_combination, cluster_worker_indices):
    policy_names = list(policies.keys())
    selected_policies = [policy_names[i] for i in range(len(policy_names)) if policy_combination[i] > 0.5]
    
    if not selected_policies:
        return -999
    
    total_impact = sum([policies[p] for p in selected_policies])
    total_cost = sum([policy_costs[p] for p in selected_policies])
    
    return total_impact - (total_cost / 100)


In [87]:
# Optimize policies for each cluster
targeted_policies = {}
cluster_names = ["Construction & Manufacturing", "Textiles & Leather", "IT & Services"]

for cluster_id in range(3):
    cluster_worker_indices = cluster_mapping.get(cluster_id, [])
    
    if not cluster_worker_indices:
        continue
    
    # Find best policy combination
    best_benefit = -999
    best_combo = None
    
    np.random.seed(cluster_id)
    for iteration in range(500):
        combo = np.random.rand(5) > 0.5
        benefit = evaluate_policy_for_cluster(combo, cluster_worker_indices)
        
        if benefit > best_benefit:
            best_benefit = benefit
            best_combo = combo
    
    targeted_policies[cluster_id] = {
        'combo': best_combo,
        'benefit': best_benefit,
        'industries': list(set([industries[i] for i in cluster_worker_indices]))
    }
    
    policy_names = list(policies.keys())
    recommended = [policy_names[i] for i in range(len(policy_names)) if best_combo[i]]
    total_impact = sum([policies[p] for p in recommended])
    
    print(f"\n  Cluster {cluster_id + 1}: {total_impact*100:.0f}% gap reduction")



  Cluster 1: 63% gap reduction


In [88]:
# PART 5: SAVE QUANTUM RESULTS
quantum_results = {
    'Metric': ['Fair Wage Prediction R²', 'Average Prediction Error', 'Pattern Clusters', 'Policies Optimized'],
    'Value': [f'{quantum_r2:.4f}', f'₹{quantum_rmse:.2f}', '3', 'Yes']
}

quantum_results_df = pd.DataFrame(quantum_results)
quantum_results_df.to_csv('outputs/quantum_ml_results.csv', index=False)
