In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [9]:
import pandas as pd
import numpy as np
from scipy.optimize import linprog


In [12]:
# Load dataset
df = pd.read_csv('/kaggle/input/lead-scoring-logistic-regression/Leads_cleaned')

# If no score column, generate simulated scores (replace this if real probabilities are available)
if 'Score' not in df.columns:
    np.random.seed(0)
    df['Score'] = np.random.uniform(0.2, 0.9, len(df))


In [13]:
# Problem setup
n = len(df)
score = df['Score'].values

# Decision variables: call_i (n) + email_i (n) = 2n variables
c = -np.concatenate([score, score])  # negative because linprog minimizes


In [14]:
# Constraints
A = []
b = []

# Call budget constraint
call_cost = 20
email_cost = 5
A.append(np.concatenate([np.full(n, call_cost), np.zeros(n)]))
b.append(1000)

# Email budget constraint
A.append(np.concatenate([np.zeros(n), np.full(n, email_cost)]))
b.append(500)

# Only one contact method per lead: call_i + email_i ≤ 1
for i in range(n):
    row = np.zeros(2 * n)
    row[i] = 1      # call_i
    row[i + n] = 1  # email_i
    A.append(row)
    b.append(1)


In [15]:
# Bounds: all variables between 0 and 1
bounds = [(0, 1) for _ in range(2 * n)]


In [16]:
# Solve using linprog
res = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')


In [17]:
# Extract solution
x = res.x
call_selected = x[:n] > 0.5
email_selected = x[n:] > 0.5

df['Selected_Call'] = call_selected.astype(int)
df['Selected_Email'] = email_selected.astype(int)

print(f"Total Calls: {df['Selected_Call'].sum()}")
print(f"Total Emails: {df['Selected_Email'].sum()}")
print(f"Expected Lead Conversions: {-res.fun:.2f}")


Total Calls: 50
Total Emails: 100
Expected Lead Conversions: 134.05


In [1]:
import matplotlib.pyplot as plt

labels = ['Calls', 'Emails']
values = [df['Selected_Call'].sum(), df['Selected_Email'].sum()]

plt.bar(labels, values, color=['skyblue', 'orange'])
plt.title("Contact Method Allocation")
plt.ylabel("Number of Leads")
plt.show()


NameError: name 'df' is not defined