In [1]:
# Import Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier

In [2]:
#  Load the Data
submission_format = pd.read_csv('submission_format.csv')
test_set_features = pd.read_csv('test_set_features.csv')
training_set_features = pd.read_csv('training_set_features.csv')
training_set_labels = pd.read_csv('training_set_labels.csv')

In [3]:
#  Preprocess the Data
def preprocess_data(df):
    df = df.copy()
    
    # Encode categorical variables
    categorical_columns = [
        'age_group', 'education', 'race', 'sex', 'income_poverty',
        'marital_status', 'rent_or_own', 'employment_status', 'hhs_geo_region',
        'census_msa', 'employment_industry', 'employment_occupation'
    ]
    
    for col in categorical_columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
    
    return df

training_set_features = preprocess_data(training_set_features)
test_set_features = preprocess_data(test_set_features)

In [4]:
#  Split the Data
X = training_set_features.drop(columns=['respondent_id'])
y_h1n1 = training_set_labels['xyz_vaccine']
y_seasonal = training_set_labels['seasonal_vaccine']

In [5]:
# Train the Models
model_h1n1 = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model_seasonal = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

model_h1n1.fit(X, y_h1n1)
model_seasonal.fit(X, y_seasonal)

In [6]:
#  Make Predictions
X_test = test_set_features.drop(columns=['respondent_id'])

predictions_h1n1 = model_h1n1.predict_proba(X_test)[:, 1]
predictions_seasonal = model_seasonal.predict_proba(X_test)[:, 1]

In [8]:
#  Prepare Submission
submission = pd.DataFrame({
    'respondent_id': test_set_features['respondent_id'],
    'xyz_vaccine': predictions_h1n1,
    'seasonal_vaccine': predictions_seasonal
})

In [9]:
#  Save Submission File
submission.to_csv('POTHULA SAI PRANEETH_datahack.csv', index=False)