In [None]:
!pip install joblib
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
import regex as re
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, roc_curve, auc, recall_score

# Load the training data containing 'address', 'city', 'province', and 'Result' columns
training_data = pd.read_excel('/content/Training_data .xlsx')

# Concatenate 'address', 'city', and 'province' columns into a single 'full_address' column
training_data['full_address'] = training_data['address'].astype(str) + ', ' + training_data['city'].astype(str) + ', ' + training_data['province'].astype(str)
API_Match = training_data['API']

# Define the replacements for the address column
replacements = {
    r'\bH\b': 'House',
    r'\bh\b': 'house',
    r'\bH(?![oO])\b': 'House',
    r'\bh(?![oO])\b': 'house',
    r'\bst\b': 'Street',
    r'\bSt\b': 'Street',
    r'\bST\b': 'Street',
    r'\bST(?![rR])\b': 'Street',
    r'\bSt(?![rR])\b': 'Street',
    r'\bst(?![rR])\b': 'Street'
}

# Function to perform replacements
def replace_words(text):
    if isinstance(text, str):
        for pattern, replacement in replacements.items():
            text = re.sub(pattern, replacement, text)
        return text
    else:
        return text

# Apply replacements to each string in the 'full_address' column
training_data['updated_address'] = training_data['full_address'].apply(replace_words)

# Define feature extraction functions
def extract_feature_1(text):
    return 'Sector' in text or 'sector' in text or 'SECTOR' in text or 'Block' in text or 'BLOCK' in text or 'block' in text or 'House' in text or 'HOUSE' in text or 'house' in text or 'Street' in text or 'STREET' in text or 'street' in text or 'FLAT' in text or 'flat' in text or 'Flat' in text
#'Sector|sector|SECTOR|Block|block|BLOCK|House|house|HOUSE|Street|street|STREET|Flat|flat|FLAT'

def extract_feature_2(text):

    return 'True' in API_Match

def extract_feature_3(text):
    return re.search(r'(House|house|HOUSE|H|no|NO|No|number|Number|n|N|Street|St|st|street|STREET) \d', text) is not None

# Extract features and add them as columns
training_data['Featured_1'] = training_data['updated_address'].apply(extract_feature_1)
training_data['Featured_2'] = training_data['updated_address'].apply(extract_feature_2)
training_data['Featured_3'] = training_data['updated_address'].apply(extract_feature_3)

# Use label encoding to convert textual labels to numerical values for the 'Result' column
label_encoder = LabelEncoder()
training_data['Result'] = label_encoder.fit_transform(training_data['Result'])

# Drop unwanted columns
training_data.drop(['order_id', 'province', 'city', 'address', 'status', 'updated_address', 'full_address'], axis=1, inplace=True)

# Separate features (X) and target (y)
X = training_data.drop('Result', axis=1)
y = training_data['Result']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(
    learning_rate=0.01,
    n_estimators=500,
    max_depth=300,
    subsample=0.85,
    colsample_bytree=0.85,
    random_state=42
)

# Define the hyperparameters to search
param_grid = {
    'max_depth': [3, 6],
    'n_estimators': [100, 300],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.7, 0.8],
    'colsample_bytree': [0.7, 0.8]
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='accuracy', cv=5)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and estimator
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Train the model with the best parameters
best_estimator.fit(X_train, y_train)

# Predict using the model
y_pred_encoded = best_estimator.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_encoded)

# Calculate precision
precision = precision_score(y_test, y_pred_encoded, average='weighted')

# Calculate ROC curve and AUC
y_pred_prob = best_estimator.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Calculate R1-score
recall = recall_score(y_test, y_pred_encoded, average='weighted')

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("ROC AUC:", roc_auc)
print("R1-score:", recall)

# Save the trained model to a file
model_filename = 'Address_Validation_Model_Updated.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(best_estimator, model_file)

print("Best Parameters:", best_params)
print("Training Completed and Model Saved as Address_Validator_Model.pkl")


Accuracy: 0.921
Precision: 0.916548097763976
ROC AUC: 0.8687372847678191
R1-score: 0.921
Best Parameters: {'colsample_bytree': 0.7, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.7}
Training Completed and Model Saved as Address_Validator_Model.pkl


**Below Prediction Stuff**

In [None]:
import pandas as pd
import pickle
import regex as re
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

# Define the replacements for the address column (same as in training code)
replacements = {
    r'\bH\b': 'House',
    r'\bh\b': 'house',
    r'\bH(?![oO])\b': 'House',
    r'\bh(?![oO])\b': 'house',
    r'\bst\b': 'Street',
    r'\bSt\b': 'Street',
    r'\bST\b': 'Street',
    r'\bST(?![rR])\b': 'Street',
    r'\bSt(?![rR])\b': 'Street',
    r'\bst(?![rR])\b': 'Street'
}
# Function to perform replacements (same as in training code)
def replace_words(text):
    if isinstance(text, str):
        for pattern, replacement in replacements.items():
            text = re.sub(pattern, replacement, text)
        return text
    else:
        return text

# Function to extract feature 2 (same as in training code)
def extract_feature_2(text):
    return 'True' in text

# Define the input data
address = 'Priceoye technology, sector h-12, NSTP NUST'
province = 'Federal'
city = 'Islamabad'
API_Match = ''  # You need to set this value based on your API result

# Concatenate 'address', 'province', and 'city' into a single string
full_address = f"{address}, {city}, {province}"

# Apply replacements to the 'full_address' string
updated_address = replace_words(full_address)

# Define feature extraction functions (similar to the training code)
def extract_feature_1(text):
    return 'Sector' in text or 'sector' in text or 'SECTOR' in text or 'Block' in text or 'BLOCK' in text or 'block' in text or 'House' in text or 'HOUSE' in text or 'house' in text or 'Street' in text or 'STREET' in text or 'street' in text or 'FLAT' in text or 'flat' in text or 'Flat' in text

def extract_feature_3(text):
    return re.search(r'(House|house|HOUSE|H|no|NO|No|number|Number|n|N|Street|St|st|street|STREET) \d', text) is not None


#API Code Below
#API CODE
import requests
import pandas as pd

# Replace YOUR_API_KEY with your actual API key.
API_KEY = "YOUR_API_KEY"

def validate_address(address):
    url = f"https://api.geoapify.com/v1/geocode/search?text={address}&limit=1&apiKey={API_KEY}"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if data.get("features"):
                result = data["features"][0]
                latitude = result["geometry"]["coordinates"][1]
                longitude = result["geometry"]["coordinates"][0]
                return "Y", latitude, longitude
            else:
                return "N", None, None
        else:
            return "N", None, None
    except requests.exceptions.RequestException:
        return "N", None, None

def translate_lat_long_to_address(latitude, longitude):
    url = f"https://api.geoapify.com/v1/geocode/reverse?lat={latitude}&lon={longitude}&apiKey={API_KEY}"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if data.get("features"):
                result = data["features"][0]
                return result["properties"]["formatted"]
            else:
                return None
        else:
            return None
    except requests.exceptions.RequestException:
        return None
def process_api(address, province, city):
  # Concatenate 'area' and 'city' fields with 'address' field
  formatted_address = f'{address}, {province}, {city}'
  # Apply the validate_address function to the formatted address and create the 'API_Output' variable
  API_Output, _, _ = validate_address(formatted_address)
  return API_Output

# Replace 'YOUR_API_KEY' with the actual API key provided by Geoapify
API_KEY = '00f15a2629b744fe9e2c1088c1d521db'
API_Output = process_api(address, province, city)
if (API_Output == "Y") and ((re.compile('House|house|HOUSE|H|no|NO|No|number|Number|n|N|Street|St|st|street|STREET').search(address) is not None) or (re.compile('House|house|HOUSE|H|no|NO|No|number|Number|n|N|Street|St|st|street|STREET \d').search(address) is not None)):
  API_Match = True
else:
  API_Match = False

# Extract features
featured_1 = extract_feature_1(updated_address)
featured_2 = extract_feature_2(str(API_Match))
featured_3 = extract_feature_3(updated_address)

# Create a DataFrame with the extracted features
input_data = pd.DataFrame({
    'API': API_Match, # True or False
    'Featured_1': [featured_1],
    'Featured_2': [featured_2],
    'Featured_3': [featured_3]
})

# Load the trained XGBoost model
model_filename = 'Address_Validation_Model_Updated.joblib'  # Adjust the path as needed
loaded_model = joblib.load(model_filename)

# Make predictions using the loaded model
y_pred_encoded = loaded_model.predict(input_data)

# Inverse transform the predicted labels to get the original textual class labels
label_encoder = LabelEncoder()
label_encoder.fit(training_data['Result'])  # Ensure training_data is available from training code
predicted_labels = label_encoder.inverse_transform(y_pred_encoded)

def decode(predicted_labels):
  if predicted_labels==0:
    return 'Complete'
  else:
    return 'Incomplete'



print("Predicted Label:", decode(predicted_labels))

Predicted Label: Complete


**Below Is the Deployment Code**

In [None]:
from flask import Flask, request, jsonify
import pandas as pd
import pickle
import regex as re
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

app = Flask(__name__)

# Define the replacements for the address column (same as in training code)
replacements = {
    r'\bH\b': 'House',
    r'\bh\b': 'house',
    r'\bH(?![oO])\b': 'House',
    r'\bh(?![oO])\b': 'house',
    r'\bst\b': 'Street',
    r'\bSt\b': 'Street',
    r'\bST\b': 'Street',
    r'\bST(?![rR])\b': 'Street',
    r'\bSt(?![rR])\b': 'Street',
    r'\bst(?![rR])\b': 'Street'
}

# Function to perform replacements (same as in training code)
def replace_words(text):
    if isinstance(text, str):
        for pattern, replacement in replacements.items():
            text = re.sub(pattern, replacement, text)
        return text
    else:
        return text

# Function to extract feature 2 (same as in training code)
def extract_feature_2(text):
    return 'True' in text

# Load the trained XGBoost model
model_filename = 'Address_Validation_Model_Updated.pkl'
with open(model_filename, 'rb') as model_file:
    loaded_model = pickle.load(model_file)

# Label Encoder (same as in training code)
label_encoder = LabelEncoder()
label_encoder.fit(training_data['Result'])  # Ensure training_data is available from training code

# Define a route for address validation and prediction
@app.route('/validate_address', methods=['POST'])
def validate_address():
    try:
        # Receive input data from the request
        data = request.get_json()

        # Extract data
        address = data['address']
        province = data['province']
        city = data['city']

        # Concatenate 'address', 'province', and 'city' into a single string
        full_address = f"{address}, {city}, {province}"

        # Apply replacements to the 'full_address' string
        updated_address = replace_words(full_address)

        # Perform address validation and feature extraction
        # (You need to implement your address validation logic here)
        API_Match = False  # You need to set this value based on your API result
        featured_1 = extract_feature_1(updated_address)
        featured_2 = extract_feature_2(str(API_Match))
        featured_3 = extract_feature_3(updated_address)

        # Create a DataFrame with the extracted features
        input_data = pd.DataFrame({
            'API': API_Match,  # True or False
            'Featured_1': [featured_1],
            'Featured_2': [featured_2],
            'Featured_3': [featured_3]
        })

        # Make predictions using the loaded model
        y_pred_encoded = loaded_model.predict(input_data)

        # Inverse transform the predicted labels to get the original textual class labels
        predicted_labels = label_encoder.inverse_transform(y_pred_encoded)

        # Define a decoding function to convert labels to meaningful values
        def decode(predicted_labels):
            if predicted_labels == 0:
                return 'Complete'
            else:
                return 'Incomplete'

        # Get the decoded prediction
        prediction = decode(predicted_labels)

        return jsonify({'prediction': prediction})

    except Exception as e:
        return jsonify({'error': str(e)})

if __name__ == '__main__':
    app.run(debug=True)


In [None]:
import pandas as pd
import flask
from flask import Flask, render_template, request, jsonify
!pip install waitress
import waitress
from waitress import serve
!pip install ngrok
!pip install flask.ngrok
from flask_ngrok import run_with_ngrok
!pip install pyngrok
!pip install --upgrade pyngrok
import pyngrok
import ngrok
!pip install asyncio
import asyncio
import ngrok
#from your_module import extract_feature_1, extract_feature_2, extract_feature_3, decode

ngrok.set_auth_token('2WCC2lAMlA4EM93VzDtGVLTkL30_4Y66EVZYRMHbnoc61cmnM')




ngrok.connect(80, "tcp")
# Print the public URL of the tunnel to the console.
#print(tunnel.public_url)
# Start an Ngrok tunnel to port 80.
import subprocess
# Run the 'netstat' command to check for port 80
result = subprocess.run(["netstat", "-an"], capture_output=True, text=True, shell=True)
# Check if port 80 is in the command output
if "80" in result.stdout:
    print("Port 80 is in use.")
else:
    print("Port 80 is not in use.")


app = Flask(__name__)
run_with_ngrok(app)

@app.route("/")
def index():
  return render_template("index.html")

@app.route("/predict", methods=["POST"])
def predict():
  # Extract features from the request data
  address = request.form["address"]
  province = request.form["province"]
  city = request.form["city"]

  # Load the trained XGBoost model
  model_filename = 'Address_Validation_Model_Updated.pkl'
  with open(model_filename, 'rb') as model_file:
    loaded_model = pickle.load(model_file)

  # Make predictions using the loaded model
  input_data = pd.DataFrame({
    "API": [True],
    "Featured_1": [extract_feature_1(address)],
    "Featured_2": [extract_feature_2(address)],
    "Featured_3": [extract_feature_3(address)]
  })

  y_pred_encoded = loaded_model.predict(input_data)
  predicted_labels = label_encoder.inverse_transform(y_pred_encoded)

  # Decode the predicted label
  predicted_label = decode(predicted_labels)

  # Return the predicted label as a JSON response
  return jsonify({"predicted_label": predicted_label})

if __name__ == "__main__":
  app.run()


Collecting ngrok
  Downloading ngrok-0.10.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ngrok
Successfully installed ngrok-0.10.1
Collecting flask.ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask.ngrok
Successfully installed flask.ngrok-0.0.25
Collecting pyngrok
  Downloading pyngrok-7.0.0.tar.gz (718 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.7/718.7 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-7.0.0-py3-none-any.whl size=21129 sha256=60a71a1f9c5d98736c753fb00fcaaa3cede48834bfd82c5d3ad4eff4b96b3821
  Stored in directory

Port 80 is not in use.
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * Running on http://76f7-35-236-222-255.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


ERROR:__main__:Exception on / [GET]
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 2529, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1825, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1823, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1799, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "<ipython-input-32-534e38c63f3a>", line 43, in index
    return render_template("index.html")
  File "/usr/local/lib/python3.10/dist-packages/flask/templating.py", line 146, in render_template
    template = app.jinja_env.get_or_select_template(template_name_or_list)
  File "/usr/local/lib/python3.10/dist-packages/jinja2/environment.py", line 1081, 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/Address_Validation_Updated/

/content/drive/MyDrive/Address_Validation_Updated


In [None]:
!python app.py

Traceback (most recent call last):
  File "/content/drive/MyDrive/Address_Validation_Updated/app.py", line 5, in <module>
    import waitress
ModuleNotFoundError: No module named 'waitress'


**Deployment Stuff Below**

In [None]:
#!python app.py --port

In [None]:
http://localhost:8080

SyntaxError: ignored