<a href="https://colab.research.google.com/github/samer-glitch/samerelhajjhassan/blob/main/Part1%20LightGBM%20and%20Part2%20Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import files
uploaded = files.upload()

Saving PSP_Jan_Feb_2019.csv to PSP_Jan_Feb_2019.csv


In [3]:
from google.colab import files
uploaded = files.upload()

Saving PSP_Jan_Feb_2019modfied1_with_tries.csv to PSP_Jan_Feb_2019modfied1_with_tries.csv


In [5]:
from google.colab import files
uploaded = files.upload()

Saving psp_fees.csv to psp_fees.csv


In [6]:
# part 1 - apply •	LightGBM ML Model to predict success probability
import pandas as pd
import time
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from imblearn.over_sampling import ADASYN
from lightgbm import LGBMClassifier

# Start timer
start_time = time.time()

# Load the datasets


# Load the dataset
transactions = pd.read_csv('PSP_Jan_Feb_2019.csv')
print(f"Dataset loaded. Time elapsed: {time.time() - start_time:.2f} seconds")

# Preprocessing
X = transactions.drop(['success', 'tmsp'], axis=1)
y = transactions['success']
categorical_features = X.select_dtypes(include=['object', 'category']).columns
numerical_features = X.select_dtypes(exclude=['object', 'category']).columns

preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

X_processed = preprocessor.fit_transform(X)
print(f"Preprocessing completed. Time elapsed: {time.time() - start_time:.2f} seconds")

# Handling class imbalance with ADASYN
adasyn = ADASYN(random_state=42)
X_resampled, y_resampled = adasyn.fit_resample(X_processed, y)
print(f"Class imbalance handled. Time elapsed: {time.time() - start_time:.2f} seconds")

# StratifiedKFold for cross-validation
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Updated LightGBM hyperparameters (include num_leaves)
lgbm_param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'num_leaves': [20, 40, 60]  # Adjust this range as needed
}


# Hyperparameter tuning using GridSearchCV
lgbm_model = LGBMClassifier(random_state=42)
grid_search = GridSearchCV(lgbm_model, lgbm_param_grid, cv=stratified_kfold, scoring='roc_auc', verbose=2, n_jobs=-1)
grid_search.fit(X_resampled, y_resampled)
best_lgbm_model = grid_search.best_estimator_

# Print best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)
print(f"Model tuning completed. Time elapsed: {time.time() - start_time:.2f} seconds")

# Fit the best model
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
best_lgbm_model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = best_lgbm_model.predict(X_test)
y_pred_proba = best_lgbm_model.predict_proba(X_test)[:, 1]
print("\033[1m" + "Accuracy Score:" + "\033[0m", accuracy_score(y_test, y_pred))
print("\033[1m" + "ROC AUC Score:" + "\033[0m", roc_auc_score(y_test, y_pred_proba))
print("\033[1m" + "Classification Report:\n" + "\033[0m", classification_report(y_test, y_pred))
print(f"Model evaluation completed. Total time elapsed: {time.time() - start_time:.2f} seconds")

# Function to determine the best PSP based on success probability and fees
def determine_best_psp(transaction_probability, psp_fees):
    best_psp = None
    best_score = float('-inf')

    for _, psp_row in psp_fees.iterrows():
        psp_name = psp_row['PSP']
        success_fee = psp_row['PSP_Fee_Successful']
        failure_fee = psp_row['PSP_Fee_Failed']
        score = transaction_probability / (success_fee + failure_fee)

        if score > best_score:
            best_score = score
            best_psp = psp_name

    return best_psp

Dataset loaded. Time elapsed: 0.21 seconds
Preprocessing completed. Time elapsed: 0.47 seconds
Class imbalance handled. Time elapsed: 2.56 seconds
Fitting 5 folds for each of 81 candidates, totalling 405 fits
[LightGBM] [Info] Number of positive: 40007, number of negative: 40182
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008949 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 605
[LightGBM] [Info] Number of data points in the train set: 80189, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498909 -> initscore=-0.004365
[LightGBM] [Info] Start training from score -0.004365
Best Hyperparameters: {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 300, 'num_leaves': 40}
Model tuning completed. Time elapsed: 412.81 seconds
[LightGBM] [Info] Number of positive: 31975, number of negative: 32176
[LightGBM]

In [10]:
# part 2 - optimization problem
import pandas as pd
import random
from datetime import datetime
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Assuming your preprocessor is a ColumnTransformer with OneHotEncoder for categorical features
# Update the OneHotEncoder to handle unknown categories
for transformer in preprocessor.transformers:
    if isinstance(transformer[1], OneHotEncoder):
        transformer[1].set_params(handle_unknown='ignore')

# Load the psp_fees data
psp_fees_df = pd.read_csv('psp_fees.csv')
print ("PSP Fees:, psp_fees_df")

def predict_success_probability(transaction, preprocessor, model):
    transaction_df = pd.DataFrame([transaction])
    processed_features = preprocessor.transform(transaction_df)
    probability = model.predict_proba(processed_features)[:, 1][0]
    return probability

def assign_weights(transaction):
    # You can define your logic for assigning weights here
    time_weight = 0.9 if transaction['hour'] in range(9, 17) else 1.0
    country_weight = {'Germany': 0.9, 'Switzerland': 1.0, 'Austria': 1.1}
    card_weight = {'Diners': 0.9, 'Visa': 1.0, 'Master': 1.0}
    psp_weight = 1.0  # You can define the psp_weight logic as needed
    return time_weight, country_weight, psp_weight, card_weight

# Function to generate a random date
def generate_random_date():
    year = random.randint(2024, 2025)
    month = random.randint(1, 12)
    day = random.randint(1, 28)
    hour = random.randint(0, 23)
    minute = random.randint(0, 59)
    second = random.randint(0, 59)
    return f'{year}-{month:02d}-{day:02d} {hour:02d}:{minute:02d}:{second:02d}'

def generate_new_transaction():
    transaction = {
        'tmsp': datetime.strptime(generate_random_date(), '%Y-%m-%d %H:%M:%S'),
        'country': random.choice(['Germany', 'Switzerland', 'Austria']),
        'amount': random.randint(5, 10000),
        '3D_secured': random.randint(0, 1),
        'card': random.choice(['Visa', 'Master', 'Diners']),
        'number_of_tries': random.randint(1, 3),
        'Unnamed: 0': 0,
        'second': random.randint(0, 59),
        'minute': random.randint(0, 59),
        'hour': random.randint(0, 23),
        'day': random.randint(1, 28),
        'weekday': random.randint(0, 6)
    }
    return transaction


def assign_weights(transaction):
    # You can define your logic for assigning weights here
    weights = {
        'time_weight': 0.9 if transaction['hour'] in range(9, 17) else 1.0,
        'country_weight': {'Germany': 0.9, 'Switzerland': 1.0, 'Austria': 1.1},
        'card_weight': {'Diners': 0.9, 'Visa': 1.0, 'Master': 1.0},
        'psp_weight': 1.0,  # You can define the psp_weight logic as needed,
        'amount_weight': 0.0,
        'tries_weight': 0.0,
        'weekend_weight': 0.0
    }

    # Add constraint for transaction amount
    if transaction['amount'] < 100:
        weights['amount_weight'] = 0.8
    elif 100 <= transaction['amount'] <= 500:
        weights['amount_weight'] = 1.0
    else:
        weights['amount_weight'] = 1.2

    # Add constraint for number of tries
    if transaction['number_of_tries'] == 1:
        weights['tries_weight'] = 0.9
    else:
        weights['tries_weight'] = 1.0

    # Add weekend weight
    if transaction['weekday'] in [5, 6]:
        weights['weekend_weight'] = 0.8

    return weights

# Rest of your code remains the same

# Function to calculate expected cost for a given PSP
def calculate_expected_cost(transaction, probability, psp_name, psp_fees_df):
    # Check if the amount is high and select Goldcard if it is
    high_amount_threshold = 500
    if transaction['amount'] > high_amount_threshold:
        psp_name = 'Goldcard'

    # Fetch fees for the selected PSP
    fees = psp_fees_df[psp_fees_df['PSP'] == psp_name]
    success_fee = fees['PSP_Fee_Successful'].values[0]
    failure_fee = fees['PSP_Fee_Failed'].values[0]

    # Assign weights
    weights = assign_weights(transaction)

    # Extract country and card weights based on the transaction data
    country_weight = weights['country_weight'].get(transaction['country'], 1.0)
    card_weight = weights['card_weight'].get(transaction['card'], 1.0)

    # Adjust cost for 3D Secure
    secure_cost_factor = 1.1 if transaction['3D_secured'] == 1 else 0.9

    expected_cost = ((probability * success_fee * weights['psp_weight'] * weights['time_weight'] *
                      weights['amount_weight'] * weights['tries_weight'] * weights['weekend_weight']) + \
                     ((1 - probability) * failure_fee * country_weight * card_weight)) * secure_cost_factor
    return expected_cost

# Rest of your code remains the same
def find_best_psp(transaction, psp_fees_df, preprocessor, model):
    min_cost = float('inf')
    best_psp = None
    debug_info = {}  # Debugging information

    for psp in psp_fees_df['PSP']:
        transaction['PSP'] = psp
        current_probability = predict_success_probability(transaction, preprocessor, model)
        cost = calculate_expected_cost(transaction, current_probability, psp, psp_fees_df)
        debug_info[psp] = {'Probability': current_probability, 'Cost': cost}
        if cost < min_cost:
            min_cost = cost
            best_psp = psp

    print("Debug Info:", debug_info)  # Print debugging information
    return best_psp, min_cost
# Generate a specific number of transactions
num_transactions = 5  # Specify the number of transactions you want to generate
new_transactions = [generate_new_transaction() for _ in range(num_transactions)]

from prettytable import PrettyTable


# Function to format transaction data into a readable string
def format_transaction_data(transaction):
    formatted_data = []
    formatted_data.append(f"Date & Time: {transaction['tmsp'].strftime('%Y-%m-%d %H:%M:%S')}")
    formatted_data.append(f"Country: {transaction['country']}")
    formatted_data.append(f"Amount: {transaction['amount']}")
    formatted_data.append(f"3D Secured: {'Yes' if transaction['3D_secured'] == 1 else 'No'}")
    formatted_data.append(f"Card Type: {transaction['card']}")
    formatted_data.append(f"Number of Tries: {transaction['number_of_tries']}")
    return '\n'.join(formatted_data)

# Process and evaluate each new transaction
for transaction in new_transactions:
    best_psp, expected_cost = find_best_psp(transaction, psp_fees_df, preprocessor, best_lgbm_model)
    transaction['PSP'] = best_psp  # Assign the best PSP

    # Format transaction details
    formatted_transaction_data = format_transaction_data(transaction)

    # Create a table for each transaction's output
    table = PrettyTable()
    table.field_names = ["Transaction Details", "Selected PSP and Cost"]
    table.align = "l"
    table.add_row([formatted_transaction_data, f"PSP: {best_psp}\nExpected Cost: {expected_cost:.2f}"])

    print(table)
    print("\n")

PSP Fees:, psp_fees_df
Debug Info: {'Moneycard': {'Probability': 0.07367462013064512, 'Cost': 5.0438416933886385}, 'Goldcard': {'Probability': 0.48847360731541317, 'Cost': 2.7852612081675754}, 'UK_Card': {'Probability': 0.1653966911589443, 'Cost': 4.544415016639549}, 'Simplecard': {'Probability': 0.3274557966042552, 'Cost': 3.6620031874898307}}
+----------------------------------+-----------------------+
| Transaction Details              | Selected PSP and Cost |
+----------------------------------+-----------------------+
| Date & Time: 2024-05-28 22:19:14 | PSP: Goldcard         |
| Country: Austria                 | Expected Cost: 2.79   |
| Amount: 2526                     |                       |
| 3D Secured: Yes                  |                       |
| Card Type: Diners                |                       |
| Number of Tries: 2               |                       |
+----------------------------------+-----------------------+


Debug Info: {'Moneycard': {'Probability':