In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [3]:
# Load the training data
train_data = pd.read_csv('/content/Doceree-HCP_Train.csv')

In [7]:
test_data = pd.read_csv('/content/Doceree-HCP_Test.csv')

In [8]:
# Step 2: Data preprocessing
# Handle missing values
train_data.fillna(0, inplace=True)
test_data.fillna(0, inplace=True)

In [9]:
# Feature selection and engineering
selected_features = ['DEVICETYPE', 'PLATFORM_ID', 'BIDREQUESTIP', 'USERCITY', 'USERZIPCODE',
                     'USERAGENT', 'PLATFORMTYPE', 'CHANNELTYPE', 'URL', 'KEYWORDS']

In [12]:
# One-hot encoding for categorical variables
train_data = pd.get_dummies(train_data)
test_data = pd.get_dummies(test_data)

In [13]:
# Align train and test data columns
X_train, X_test = train_data.align(test_data, join='inner', axis=1)

In [14]:
# Extract target variable
y_train = train_data['IS_HCP']

In [15]:
# Step 3: Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [16]:
# Step 4: Make predictions
train_predictions = model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
print("Train Accuracy:", train_accuracy)

Train Accuracy: 1.0


In [17]:
test_predictions = model.predict(X_test)


In [18]:
# Step 5: Generate output file
output = pd.DataFrame({'ID': test_data['ID'], 'IS_HCP': test_predictions})

In [19]:
# Save output file
output.to_csv('Output.csv', index=False)

In [45]:
# Function to predict HCP and specialization given user input
def predict_hcp_specialization(user_id, ad_server_log):
    # Preprocess user input
    user_input = pd.DataFrame(ad_server_log, columns=X_train.columns)
    user_input.fillna(0, inplace=True)
    user_input = pd.get_dummies(user_input)
    user_input = user_input.reindex(columns=X_train.columns, fill_value=0)

    # Make prediction
    prediction = model.predict(user_input)
    specialization = None

    # Check if HCP is identified
    if prediction[0] == 1:
        # Extract specialization (taxonomy) if available
        specialization = test_data.loc[test_data['USERPLATFORMUID'] == user_id, 'TAXONOMY'].values[0]

    return prediction[0], specialization


In [52]:
# Example user input
user_id = 'user123'
ad_server_log = {
    'DEVICETYPE': ['Desktop'],
    'PLATFORM_ID': ['12345'],
    'BIDREQUESTIP': ['192.168.0.1'],
    'USERCITY': ['New York'],
    'USERZIPCODE': ['12345'],
    'USERAGENT': ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'],
    'PLATFORMTYPE': ['EHR'],
    'CHANNELTYPE': ['website'],
    'URL': ['https://example.com'],
    'KEYWORDS': ['healthcare']
}

In [53]:
# Predict HCP and specialization for user input
hcp_prediction, taxonomy = predict_hcp_specialization(user_id, ad_server_log)
print("HCP Prediction:", hcp_prediction)
print("Taxonomy:", taxonomy)

HCP Prediction: 0.0
Taxonomy: None


In [50]:

# Example user input
user_id = '115536'
ad_server_log = {
    'DEVICETYPE': ['Mobile'],
    'PLATFORM_ID': ['2'],
    'BIDREQUESTIP': ['69.230.149.79'],
    'USERCITY': ['Lilburn'],
    'USERZIPCODE': ['54321'],
    'USERAGENT': ['Mozilla/5.0 (iPhone; CPU iPhone OS 15_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/101.0.4951.58 Mobile/15E148 Safari/604.1'],
    'PLATFORMTYPE': ['Online Medical Journal'],
    'CHANNELTYPE': ['Website'],
    'URL': ['https://www.renalandurologynews.com/slideshow/clinical-quiz/a-peritoneal-dialysis-patient-with-peri-catheter-pain/'],
    'KEYWORDS': ['General|Diagnosis|Nephritis|Oncology|Intravenous|Cardiovascular|Clinical|Small|Cardiology|Psychiatry|Medicine|Erythema|Abscess|False|Reproductive|Controlled|Pathology|Ophthalmology|Transplantation|Premature|Chronic|Dermatology|Gastroenterology|Neurology|Health|Rheumatology|Urology|Hematology|Surgical|Anemia']
}

In [51]:
# Predict HCP and specialization for user input
hcp_prediction, taxonomy = predict_hcp_specialization(user_id, ad_server_log)
print("HCP Prediction:", hcp_prediction)
print("Taxonomy:", taxonomy)

HCP Prediction: 0.0
Taxonomy: None
