In [1]:
import sys
import subprocess

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

required_packages = [
    "huggingface_hub",
    "transformers",
    "datasets",
    "evaluate",
    "pandas",
    "torch",
    "torchaudio",
    "torchvision",
    "accelerate>=0.26.0",
    "peft"
]

for package in required_packages:
    try:
        __import__(package.split("==")[0].split(">=")[0].split("<=")[0])
    except ImportError:
        print(f"Installing {package}...")
        install(package)


Installing evaluate...


In [2]:
import re
import numpy as np
import pandas as pd
from datasets import load_dataset, concatenate_datasets, Dataset
from sklearn.model_selection import train_test_split

# Loading the dataset

In [9]:
from huggingface_hub import hf_hub_download
import pandas as pd

huggingface_repo_id = "lleratodev/ai-powered-phishing-email-detection-system"
target_files = ["SpamAssasin.csv", "Nigerian_Fraud.csv", "CEAS_08.csv", "Nazario.csv"]

dfs = []
load_errors = False

try:
    for filename in target_files:
        print(f"  - Downloading {filename} from HF Hub...")
        local_path = hf_hub_download(repo_id=huggingface_repo_id, filename=filename, repo_type="dataset")

        print(f"  - Loading {filename} with pandas...")
        df = pd.read_csv(local_path)
        df["source"] = filename  # optional: label source
        dfs.append(df)

    print("\nAll files loaded successfully.")

    # Combine all into one DataFrame
    print("\nCombining into a single DataFrame...")
    main_df = pd.concat(dfs, ignore_index=True)
    print(f"Combined DataFrame has {len(main_df)} rows.")

except Exception as e:
    print(f"\n--- ❌ ERROR ---")
    print(f"Failed to load and combine datasets.")
    print(f"Error details: {e}")
    load_errors = True

  - Downloading SpamAssasin.csv from HF Hub...
  - Loading SpamAssasin.csv with pandas...
  - Downloading Nigerian_Fraud.csv from HF Hub...


Nigerian_Fraud.csv:   0%|          | 0.00/9.19M [00:00<?, ?B/s]

  - Loading Nigerian_Fraud.csv with pandas...
  - Downloading CEAS_08.csv from HF Hub...


CEAS_08.csv:   0%|          | 0.00/67.9M [00:00<?, ?B/s]

  - Loading CEAS_08.csv with pandas...
  - Downloading Nazario.csv from HF Hub...


Nazario.csv:   0%|          | 0.00/7.81M [00:00<?, ?B/s]

  - Loading Nazario.csv with pandas...

✅ All files loaded successfully.

Combining into a single DataFrame...
✅ Combined DataFrame has 49860 rows.


# Cleaning the dataset

In [10]:
# Handling data here, dropping rows with no body & filling null/NaNs subjects and sender with empty strings

if main_df is not None:
    desired_columns = ['subject', 'sender', 'body', 'label']

    print(f"\nSelecting desired columns: {desired_columns}")

    actual_cols_in_df = main_df.columns.tolist()
    cols_to_keep = [col for col in desired_columns if col in actual_cols_in_df]
    missing_cols = [col for col in desired_columns if col not in cols_to_keep]

    if missing_cols:
        print(f"  WARNING: Desired columns expected but NOT found: {missing_cols}")
        print(f"  Available columns: {actual_cols_in_df}")

    if cols_to_keep:
        main_df = main_df[cols_to_keep].copy() # Use .copy() to be safe
        print("Columns selected successfully.")

        print("\n--- Final Prepared DataFrame ---")
        print(f"Shape: {main_df.shape[0]} rows, {main_df.shape[1]} columns")

        print(main_df.head())
        main_df.info()
    else:
        print("\n--- ERROR ---")
        print("None of the desired columns were found in the combined DataFrame. Cannot proceed.")
        final_df = None

else:
    print("\nSkipping column selection because DataFrame creation failed or was skipped.")


Selecting desired columns: ['subject', 'sender', 'body', 'label']
Columns selected successfully.

--- Final Prepared DataFrame ---
Shape: 49860 rows, 4 columns
                                          subject  \
0                        Re: New Sequences Window   
1                       [zzzzteana] RE: Alexander   
2                       [zzzzteana] Moscow bomber   
3           [IRR] Klez: The Virus That  Won't Die   
4  Re: [zzzzteana] Nothing like mama used to make   

                                      sender  \
0             Robert Elz <kre@munnari.OZ.AU>   
1  Steve Burt <Steve_Burt@cursor-system.com>   
2              "Tim Chapman" <timc@2ubh.com>   
3           Monty Solomon <monty@roscom.com>   
4  Stewart Smith <Stewart.Smith@ee.ed.ac.uk>   

                                                body  label  
0  Date:        Wed, 21 Aug 2002 10:54:46 -0500  ...      0  
1  Martin A posted:\nTassos Papadopoulos, the Gre...      0  
2  Man Threatens Explosion In Moscow \n\nThur

In [11]:
if 'main_df' in locals() and isinstance(main_df, pd.DataFrame):
    print("Missing values before handling:")
    print(main_df.isnull().sum())
    initial_rows = len(main_df)
    print(f"Initial number of rows: {initial_rows}")

    # Body strategy: We'll drop the row(s) with missing 'body'
    main_df.dropna(subset=['body'], inplace=True)
    print("Dropped rows where 'body' was missing.")
    rows_after_body = len(main_df)
    print(f"Rows removed: {initial_rows - rows_after_body}")
    print(f"Current number of rows: {rows_after_body}")

    # Subject strategy: Fill missing 'subject' with an empty string ''
    initial_subject_nan = main_df['subject'].isnull().sum()
    if initial_subject_nan > 0:
        main_df['subject'] = main_df['subject'].fillna('') # Assign back
        print(f"Filled {initial_subject_nan} missing 'subject' values with empty strings.")
    else:
        print("No missing 'subject' values found to fill.")

    # Sender strategy: Fill missing 'sender' with an empty string ''
    initial_sender_nan = main_df['sender'].isnull().sum()
    if initial_sender_nan > 0:
        main_df['sender'] = main_df['sender'].fillna('') # Assign back
        print(f"Filled {initial_sender_nan} missing 'sender' values with empty strings.")
    else:
        print("No missing 'sender' values found to fill.")

    print("Missing values now:")
    print(main_df.isnull().sum())
    final_rows = len(main_df)
    print(f"Final number of rows: {final_rows}")

    print(main_df.info())
else:
    print("--- ERROR ---")
    print("DataFrame 'main_df' not found or is not a DataFrame.")
    print("Please ensure the previous data loading steps were successful.")

Missing values before handling:
subject     87
sender     331
body         1
label        0
dtype: int64
Initial number of rows: 49860
Dropped rows where 'body' was missing.
Rows removed: 1
Current number of rows: 49859
Filled 87 missing 'subject' values with empty strings.
Filled 331 missing 'sender' values with empty strings.
Missing values now:
subject    0
sender     0
body       0
label      0
dtype: int64
Final number of rows: 49859
<class 'pandas.core.frame.DataFrame'>
Index: 49859 entries, 0 to 49859
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   subject  49859 non-null  object
 1   sender   49859 non-null  object
 2   body     49859 non-null  object
 3   label    49859 non-null  int64 
dtypes: int64(1), object(3)
memory usage: 1.9+ MB
None


In [12]:
# Text clean-up function first
def simple_text_clean(text):
        if isinstance(text, str):
            text = text.lower()
            text = re.sub(r'[^a-z0-9\s]', '', text)  # Remove non-alphanumeric but keep spaces
            text = re.sub(r'\s+', ' ', text).strip()  # Normalize whitespace
        else:
            text = ''
        return text

print("\n--- Applying simple text clean-up ---")
for col in ['subject', 'sender', 'body']:
    if col in main_df.columns:
        print(f"Cleaning column: {col}...")
        main_df[col] = main_df[col].apply(simple_text_clean)
    else:
        print(f"Warning: Column '{col}' not found for cleaning.")
print("Text cleaning complete.")


--- Applying simple text clean-up ---
Cleaning column: subject...
Cleaning column: sender...
Cleaning column: body...
Text cleaning complete.


In [13]:
# Joining features to create X and y (feature and label) and variable configurations for the model
# --- Defining our model features (X - sender, subject & body) and Target (y = label (Spam or legitimate)) ---
feature_columns = []
if 'subject' in main_df.columns: feature_columns.append('subject')
if 'sender' in main_df.columns: feature_columns.append('sender')
if 'body' in main_df.columns: feature_columns.append('body')

if not feature_columns or 'label' not in main_df.columns:
    print("\n--- ERROR ---")
    print("Required email feature columns ('subject', 'sender', 'body') or 'label' column are missing.")
    print(f"Available columns: {main_df.columns.tolist()}")
else:
    main_df['email'] = main_df['sender'] + ' ' + main_df['subject'] + ' ' + main_df['body']
    df = main_df[['email', 'label']].copy()

    print(df.head())

                                               email  label
0  robert elz kremunnariozau re new sequences win...      0
1  steve burt steveburtcursorsystemcom zzzzteana ...      0
2  tim chapman timc2ubhcom zzzzteana moscow bombe...      0
3  monty solomon montyroscomcom irr klez the viru...      0
4  stewart smith stewartsmitheeedacuk re zzzztean...      0


In [14]:
# --- Train and Test split ---
test_split = 0.2
df_train, df_test = train_test_split(df, test_size=test_split, random_state=42)

print(f"df_train shape: {df_train.shape}")
print(f"df_test shape: {df_test.shape}")

# Convert to hugging face dataset
train_dataset = Dataset.from_pandas(df_train)
test_dataset = Dataset.from_pandas(df_test)

print(f"train_dataset shape: {train_dataset.shape}"); print(f"test_dataset shape: {test_dataset.shape}")

# Update model_name to BERT-Mini
model_name = "prajjwal1/bert-mini"
print(f"\nUsing model: {model_name}")

df_train shape: (39887, 2)
df_test shape: (9972, 2)
train_dataset shape: (39887, 3)
test_dataset shape: (9972, 3)

Using model: prajjwal1/bert-mini


# Preprocessing

In [15]:
from transformers import AutoTokenizer

In [16]:
# Initialize tokenizer for BERT-Mini
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [17]:
def preprocess(examples):
  return tokenizer(examples["email"], truncation=True, padding="max_length", max_length=512)

In [18]:
print("\n--- Tokenizing Data ---")
tokenized_train_data = train_dataset.map(preprocess, batched=True)
tokenized_test_data = test_dataset.map(preprocess, batched=True)
print("Tokenization complete.")
print(f"Example of tokenized input: {tokenized_train_data[0]['input_ids'][:20]}...") # Show first 20 token IDs


--- Tokenizing Data ---


Map:   0%|          | 0/39887 [00:00<?, ? examples/s]

Map:   0%|          | 0/9972 [00:00<?, ? examples/s]

Tokenization complete.
Example of tokenized input: [101, 2720, 7875, 24206, 26099, 11113, 16052, 3022, 2480, 2575, 9006, 14806, 2015, 2013, 11558, 7592, 2026, 6203, 2077, 1045]...


In [20]:
from transformers import AutoModelForSequenceClassification

In [21]:
# Load BERT-Mini for sequence classification
# label_numbers should be defined (e.g., 2 for binary classification)
# Ensure label_numbers is defined, e.g.:
# label_numbers = 2 # Or based on your dataset
label_numbers = 2
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=label_numbers)
print(f"\nModel {model_name} loaded for sequence classification with {label_numbers} labels.")

pytorch_model.bin:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-mini and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Model prajjwal1/bert-mini loaded for sequence classification with 2 labels.


# Training the model

In [22]:
from transformers import DataCollatorWithPadding
import evaluate
import numpy as np

In [23]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
metric = evaluate.load("accuracy")

def compute_metrics(evaluate_prediction):
  logits, labels = evaluate_prediction
  predictions = np.argmax(logits, axis=1)
  return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [25]:
from transformers import TrainingArguments

In [26]:
training_args = TrainingArguments(
    output_dir="./results_bert_mini", # Changed output directory
    learning_rate=2e-5, # Common learning rate for BERT fine-tuning
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01, # Added weight decay
    eval_strategy = "epoch",
    logging_strategy = "epoch",
    save_strategy = "epoch",
    load_best_model_at_end=True, # Load the best model at the end of training
    metric_for_best_model="accuracy", # Use accuracy to determine the best model
)

In [27]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_data,
    eval_dataset=tokenized_test_data,
    processing_class=tokenizer, # Corrected: processing_class -> tokenizer
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [28]:
print("\n--- Starting Training ---")
trainer.train()
print("Training complete.")


--- Starting Training ---




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mu25468023[0m ([33mu25468023-university-of-pretoria[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0987,0.038881,0.991276
2,0.032,0.031891,0.993682


Training complete.


In [30]:
# Explicit model saving - important for using later in the prototype
saved_model_path = 'cos270_u25468023_bertmini_trained_model'
trainer.save_model(saved_model_path)
print(f"Model saved to {saved_model_path}")

saved_tokenizer_path = 'cos270_u25468023_bertmini_trained_tokenizer'
tokenizer.save_pretrained(saved_tokenizer_path)
print(f"Tokenizer saved to {saved_tokenizer_path}")

Model saved to cos270_u25468023_bertmini_trained_model
Tokenizer saved to cos270_u25468023_bertmini_trained_tokenizer


In [31]:
from sklearn.metrics import classification_report

In [32]:
# Evaluation on the training data
print("\n--- Evaluating on Training Data ---")
train_predictions = trainer.predict(tokenized_train_data)
train_preds = np.argmax(train_predictions.predictions, axis=1)
train_gt = df_train['label'].tolist() # Ensure this aligns with tokenized_train_data
print(classification_report(train_gt, train_preds, target_names=['Legitimate', 'Spam'])) # 0 for Legitimate, 1 for Phishing


--- Evaluating on Training Data ---


              precision    recall  f1-score   support

  Legitimate       1.00      0.99      1.00     17162
        Spam       1.00      1.00      1.00     22725

    accuracy                           1.00     39887
   macro avg       1.00      1.00      1.00     39887
weighted avg       1.00      1.00      1.00     39887



In [None]:
# Evaluation on the test data
print("\n--- Evaluating on Test Data ---")
test_predictions_output = trainer.predict(tokenized_test_data)
test_preds = np.argmax(test_predictions_output.predictions, axis=1)

# Ensure that df_test['label'] aligns with the order in tokenized_test_data
# If you used shuffle=False in Dataset.from_pandas or ensured order is maintained, this should be fine.
test_gt = df_test['label'].tolist()

print(classification_report(test_gt, test_preds, target_names=['Legitimate', 'Phishing']))

# Display final evaluation metrics from the trainer
eval_results = trainer.evaluate()
print(f"\nFinal evaluation metrics on the test set: {eval_results}")


--- Evaluating on Test Data ---


              precision    recall  f1-score   support

  Legitimate       0.99      0.99      0.99      4241
        Spam       0.99      1.00      0.99      5731

    accuracy                           0.99      9972
   macro avg       0.99      0.99      0.99      9972
weighted avg       0.99      0.99      0.99      9972




Final evaluation metrics on the test set: {'eval_loss': 0.03571997955441475, 'eval_accuracy': 0.992579221821099, 'eval_runtime': 23.7758, 'eval_samples_per_second': 419.417, 'eval_steps_per_second': 26.245, 'epoch': 2.0}


In [35]:
import torch

In [36]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [37]:
# Testing examples
# BERT-Mini
import time

test_subset = test_dataset.select(range(10))  # Using test_dataset
emails = test_subset['email']  # Using 'email' column
labels = test_subset['label']
start_time = time.time()
inputs = tokenizer(emails, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)  # Using your tokenizer, max_length=512
model.eval()  # Using your original model
with torch.no_grad():
    outputs = model(**inputs)  # Using your original model
    preds = outputs.logits.argmax(dim=-1).cpu().numpy()
inference_time = time.time() - start_time
label_names = ['Legitimate', 'Spam']  # Assuming these are your label names
for i, (email, true, pred) in enumerate(zip(emails, labels, preds)):  # Using 'email'
    print(f"Example {i+1}")
    print("Email:", email)  # Using 'email'
    print("True label:", label_names[true])
    print("Predicted:", label_names[pred])
    print("---")
print(f"Inference time for 1 example: {inference_time/len(emails):.4f} seconds") # calculate the average inference time

Example 1
Email: trent nelson referredqg6372rentokilinitialcom softtabsfdaapprovednewproducts infosafesecuretoallcountries httpg5lgowblulivefilestorecomy1p0kbqx0qevwcu39mwthnfafxwp24rzzxobzrdbklpcsddu12msobxfxzwsvzyljmaydk1jnc3irz7sjzluyqindexhtml
True label: Spam
Predicted: Spam
---
Example 2
Email: christy samuels christycoralwavecom create a furore in her bedroom your new bigger and stronger love wand will make your mate culminate faster and brighter see your tool swell in length and width immensely httpwhitedonecom helpless victim is filmed by the attackers friends forsells your next book mickey spillaneacross british society
True label: Spam
Predicted: Spam
---
Example 3
Email: susan martins smartins1webmailcoza urgentplease help me from mrssusan martins 101 jan smuts avenue craighall johannesburg south africa tel27732250444direct with due respect trust and humility i write you this proposal which i believe will be a great interest to you i found your contact while i was doing a p

Trying out LoRa

In [None]:
# from peft import LoraConfig, get_peft_model

In [None]:
# from peft import TaskType

# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     lora_dropout=0.1,
#     bias="none",
#     task_type=TaskType.SEQ_CLS,
#     target_modules=["query", "value"]
# )

# # Example for BERT-Mini
# lora_model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-mini", num_labels=2).to(device)
# peft_model = get_peft_model(lora_model, lora_config)
# peft_model.print_trainable_parameters()

# args = TrainingArguments(
#     output_dir="lora-minibert-phishing-cos720",
#     per_device_train_batch_size=16,
#     per_device_eval_batch_size=16,
#     num_train_epochs=2,
#     save_strategy="no",
#     report_to="none"
# )
# peft_trainer = Trainer(
#     model=peft_model,
#     args=args,
#     train_dataset=tokenized_train_data.shuffle(seed=42).select(range(4000)),
#     eval_dataset=tokenized_test_data.shuffle(seed=42).select(range(1000)),
#     compute_metrics=lambda eval_pred: {"accuracy": (np.argmax(eval_pred[0], axis=-1) == eval_pred[1]).mean()}
# )
# peft_trainer.train()
# results_lora_mini = peft_trainer.evaluate()
# print("LoRA test accuracy:", results_lora_mini["eval_accuracy"])

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-mini and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 33,282 || all params: 11,204,356 || trainable%: 0.2970


Step,Training Loss
500,0.5687


LoRA test accuracy: 0.812


In [None]:
# # Evaluation on the test data
# print("\n--- Evaluating on Test Data ---")
# test_predictions_output_lora = peft_trainer.predict(tokenized_test_data)
# test_preds_lora = np.argmax(test_predictions_output_lora.predictions, axis=1)

# # Ensure that df_test['label'] aligns with the order in tokenized_test_data
# # If you used shuffle=False in Dataset.from_pandas or ensured order is maintained, this should be fine.
# test_gt_lora = df_test['label'].tolist()

# print(classification_report(test_gt_lora, test_preds_lora, target_names=['Legitimate', 'Spam'])) # Assuming 0 for Legitimate, 1 for Spam

# # Display final evaluation metrics from the trainer
# eval_results_lora = peft_trainer.evaluate()
# print(f"\nBERT with LoRa inal evaluation metrics on the test set: {eval_results_lora}")


--- Evaluating on Test Data ---


              precision    recall  f1-score   support

  Legitimate       0.76      0.81      0.79      4241
        Spam       0.86      0.81      0.83      5731

    accuracy                           0.81      9972
   macro avg       0.81      0.81      0.81      9972
weighted avg       0.82      0.81      0.81      9972




BERT with LoRa inal evaluation metrics on the test set: {'eval_loss': 0.4920993149280548, 'eval_accuracy': 0.812, 'eval_runtime': 2.8436, 'eval_samples_per_second': 351.661, 'eval_steps_per_second': 22.155, 'epoch': 2.0}


In [None]:
# # Show predictions for 10 examples with your LoRA model
# test_subset = test_dataset.select(range(10))  # Using test_dataset instead of 'dataset'
# emails = test_subset['email']  # Using 'email' column instead of 'text'
# labels = test_subset['label']
# start_time = time.time()
# inputs = tokenizer(emails, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)  # Adjust max_length if needed
# peft_model.eval()
# with torch.no_grad():
#     outputs = peft_model(**inputs)
#     preds = outputs.logits.argmax(dim=-1).cpu().numpy()

# inference_time = time.time() - start_time

# label_names = ['Legitimate', 'Spam']

# for i, (email, true, pred) in enumerate(zip(emails, labels, preds)):
#     print(f"Example {i+1}")
#     print("Email:", email)
#     print("True label:", label_names[true])
#     print("Predicted (LoRA):", label_names[pred])
#     print("---")
# print(f"Inference time for 1 example: {inference_time/len(emails):.4f} seconds") # calculate the average inference time

Example 1
Email: trent nelson referredqg6372rentokilinitialcom softtabsfdaapprovednewproducts infosafesecuretoallcountries httpg5lgowblulivefilestorecomy1p0kbqx0qevwcu39mwthnfafxwp24rzzxobzrdbklpcsddu12msobxfxzwsvzyljmaydk1jnc3irz7sjzluyqindexhtml
True label: Spam
Predicted (LoRA): Spam
---
Example 2
Email: christy samuels christycoralwavecom create a furore in her bedroom your new bigger and stronger love wand will make your mate culminate faster and brighter see your tool swell in length and width immensely httpwhitedonecom helpless victim is filmed by the attackers friends forsells your next book mickey spillaneacross british society
True label: Spam
Predicted (LoRA): Spam
---
Example 3
Email: susan martins smartins1webmailcoza urgentplease help me from mrssusan martins 101 jan smuts avenue craighall johannesburg south africa tel27732250444direct with due respect trust and humility i write you this proposal which i believe will be a great interest to you i found your contact while i