In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Set random seed for reproducibility
np.random.seed(42)

# Number of new invoices
n_new = 200

# Generate random data for the new dataset
invoice_ids_new = [f"INV{2000 + i}" for i in range(n_new)]
vendor_ids_new = np.random.randint(1, 50, size=n_new)
product_codes_new = np.random.choice(['A001', 'B002', 'C003', 'D004'], size=n_new)
quantities_new = np.random.randint(1, 20, size=n_new)
unit_prices_new = np.random.uniform(50, 500, size=n_new).round(2)
discounts_new = np.random.uniform(0.05, 0.25, size=n_new).round(2)
tax_rates_new = np.random.uniform(0.05, 0.1, size=n_new).round(2)
rebates_new = np.random.uniform(0.05, 0.15, size=n_new).round(2)
payment_terms_new = np.random.choice(['Net 30', 'Net 60', 'Early 10% Discount'], size=n_new)
previous_claims_new = np.random.randint(0, 5, size=n_new)

# Simulate invoice dates and payment dates
payment_dates_new = pd.to_datetime(np.random.choice(pd.date_range('2023-01-01', '2023-12-31', freq='D'), n_new))
invoice_due_dates_new = pd.to_datetime(np.random.choice(pd.date_range('2023-01-01', '2023-12-31', freq='D'), n_new))

# Calculate Invoice Amount Before Adjustments
invoice_amount_before_new = quantities_new * unit_prices_new

# Apply discounts
agreed_price_new = invoice_amount_before_new * (1 - discounts_new)

# Apply taxes
tax_amount_new = agreed_price_new * tax_rates_new
total_amount_with_tax_new = agreed_price_new + tax_amount_new

# Apply rebates
rebate_amount_new = agreed_price_new * rebates_new
total_amount_with_rebate_new = total_amount_with_tax_new - rebate_amount_new

# Calculate Payment Behavior based on Payment Date vs Invoice Due Date
payment_behavior_new = np.where(payment_dates_new > invoice_due_dates_new, 'Late',
                                np.where(payment_dates_new == invoice_due_dates_new, 'On Time', 'Early'))

# Early Payment Discount: 5% discount if paid early
early_payment_discount_new = np.zeros(n_new)
early_payment_discount_new[payment_behavior_new == 'Early'] = 0.05  # Early payment discount

# Adjust early discount based on "Early 10% Discount" payment term
early_payment_discount_new[payment_terms_new == 'Early 10% Discount'] = 0.10

# Calculate the number of days late
late_days_new = (payment_dates_new - invoice_due_dates_new).days

# Late Payment Penalty: 1% of the total amount for every 10 days late
late_payment_penalty_new = np.where(late_days_new > 0, (late_days_new // 10) * 0.01 * total_amount_with_rebate_new, 0)

# Interest Charged: 1% of total amount due for each 30 days of late payment
interest_charged_new = np.where(late_days_new > 30, total_amount_with_rebate_new * 0.01, 0)

# Final Total Due (Including Interest) = Total Amount Due (With Tax) + Interest Charged
final_total_due_including_interest_new = total_amount_with_tax_new + interest_charged_new

# Modify amount paid (simulate discrepancies)
amount_paid_new = total_amount_with_rebate_new * (1 - early_payment_discount_new) + late_payment_penalty_new

# Create the DataFrame
df_new = pd.DataFrame({
    "Invoice ID": invoice_ids_new,
    "Vendor ID": vendor_ids_new,
    "Product Code": product_codes_new,
    "Quantity Ordered": quantities_new,
    "Unit Price": unit_prices_new,
    "Discount Applied (%)": discounts_new,
    "Tax Rate (%)": tax_rates_new,
    "Rebate Applied (%)": rebates_new,
    "Payment Terms": payment_terms_new,
    "Previous Claims": previous_claims_new,
    "Amount Paid": amount_paid_new,
    "Invoice Amount (Before Adjustments)": invoice_amount_before_new,
    "Total Amount Due (With Rebate)": total_amount_with_rebate_new,
    "Total Amount Due (With Tax)": total_amount_with_tax_new,
    "Payment Date": payment_dates_new,
    "Invoice Due Date": invoice_due_dates_new,
    "Payment Behavior": payment_behavior_new,
    "Interest Charged": interest_charged_new,
    "Final Total Due (Including Interest)": final_total_due_including_interest_new
})

# Standardize the dataset to prepare for the model
scaler = StandardScaler()
features = ['Invoice Amount (Before Adjustments)', 'Discount Applied (%)', 'Tax Rate (%)', 'Rebate Applied (%)',
            'Previous Claims', 'Amount Paid', 'Total Amount Due (With Rebate)', 'Total Amount Due (With Tax)',
            'Interest Charged', 'Final Total Due (Including Interest)']
df_new[features] = scaler.fit_transform(df_new[features])

# Predict the 'Payment Status' using the trained SVM model
predictions = svm_model.predict(df_new[features])

# Map predictions back to 'Overpayment' and 'Underpayment'
df_new['Payment Status'] = np.where(predictions == 1, 'Overpayment', 'Underpayment')

# Show the new dataset
print(df_new.head())

# Save the new dataset
df_new.to_excel("New_Recovery_Audit_Dataset.xlsx", index=False)


NameError: name 'svm_model' is not defined