In [33]:
import pandas as pd
import numpy as np

data_url = '/Users/tony/Desktop/outbound/data/cleaned_data.csv'
output_url = '/Users/tony/Desktop/outbound/data/processed_data.csv'

data = pd.read_csv(data_url)

def add_scoring_columns(data):
    # Industry relevance score
    relevant_industries = [
        "Computer Software", "Marketing & Advertising", "Professional Services", "Information Technology & Services",
        "Management Consulting", "IT Services and IT Consulting", "Information Technology and Services",
        "Software Development", "Luxury Goods & Jewelry", "Real Estate", "Internet Software & Services",
        "Retail", "Hospitality", "Sports", "Airlines/Aviation", "Manufacturing", "Computer & Network Security"
    ]
    data["industry_score"] = data["Industry"].apply(
        lambda x: 20 if x in relevant_industries else 0
    )
    
    return data

data = add_scoring_columns(data)



In [34]:
import pandas as pd

def calculate_region_score(location):
    tier_1 = [
        "United States", "Canada", "United Kingdom", "Germany", "France", "Italy", "Spain",
        "Australia", "Netherlands", "Sweden", "Norway", "Switzerland", "Denmark", "Belgium",
        "Finland", "Japan", "Singapore", "New Zealand", "South Korea", "Israel", "Luxembourg",
        "Ireland", "Austria"
    ]

    tier_2 = ["India", "China", "UAE", "Saudi Arabia"]

    tier_3 = [
        "Malaysia", "Indonesia", "Brazil", "South Africa", "Vietnam", "Philippines", "Thailand",
        "Mexico", "Turkey", "Argentina", "Poland", "Romania", "Czech Republic", "Morocco",
        "Kenya", "Nigeria", "Egypt", "Colombia", "Peru", "Chile"
    ]

    if pd.isna(location):  # Handle NaN or None values
        return 0

    return 30 if location in tier_1 else (20 if location in tier_2 else (10 if location in tier_3 else 0))


def add_location_score_column(df, location_column="location", new_column_name="location_score"):
    """Adds a location score column to the DataFrame."""
    df[new_column_name] = df[location_column].apply(calculate_region_score)
    return df


# Add the location score column:
data = add_location_score_column(data)  # Uses default column names

In [35]:
def score_role(position):
    if pd.isnull(position):
        return 0
    position = position.lower()
    if any(title in position for title in ["c-level", "ceo", "cto", "cfo"]):
        return 30
    elif any(title in position for title in ["vp", "vice president", "director"]):
        return 20
    elif "manager" in position:
        return 10
    else:
        return 5

data["role_score"] = data["position"].apply(score_role)


In [36]:
import pandas as pd

def calculate_visits_score(num_visits):
    """Calculates a visits score based on the number of visits."""
    if num_visits >= 20:
        return 50
    elif num_visits >= 10:
        return 40
    elif num_visits >= 6:
        return 30
    elif num_visits >= 4:
        return 20
    elif num_visits >= 2:
        return 10
    else:
        return 2

def add_visits_score_column(data, visits_column="visit_count", score_column="number_of_visit_score"): # Corrected column name
    """Adds a visits score column to the DataFrame."""
    data[score_column] = data[visits_column].apply(calculate_visits_score) # Corrected column name
    return data


data = add_visits_score_column(data, visits_column="visit_count", score_column="number_of_visit_score") # Example with different column names


In [37]:
import pandas as pd

def calculate_visit_duration_score(duration_seconds):
    """Calculates a visit duration score based on the duration in seconds."""
    if duration_seconds > 60:
        return 30
    elif duration_seconds > 30:
        return 20
    elif duration_seconds > 10:
        return 10
    else:
        return 5

def add_visit_duration_score_column(data, duration_column="time_spent_on_site", score_column="visit_duration_score"): # Corrected column name
    """Adds a visit duration score column to the DataFrame."""

    # Attempt to convert to numeric, handling errors:
    try:
        data[duration_column] = pd.to_numeric(data[duration_column], errors='coerce') # Convert to numeric, errors to NaN
    except Exception as e:
        print(f"Error converting column to numeric: {e}")
        return data # Return the DataFrame as is if conversion fails

    data[score_column] = data[duration_column].apply(calculate_visit_duration_score)
    return data


data = add_visit_duration_score_column(data)  # Using default column names
#


In [38]:
data.to_csv(output_url, index=False)