In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv('/Users/mjallow/mrc-hackathon/dev/data/meningitis_data.csv')
df.head()

ext_df = pd.read_csv('/Users/mjallow/mrc-hackathon/dev/data/ext_meningitis_data_cleaned.csv')
df = pd.concat([df, ext_df], ignore_index=True)
df.head()

In [None]:
selected_columns = [
    'age',                      # Patient age
    'sexofthepatient',          # Gender
    'whitecellcount',           # CSF WBC count
    'csfproteinresult',         # CSF protein level
    'csfglucoseresult',         # CSF glucose level
    'csf_gram_resul',           # Gram stain result
    'csfcultureresults',        # CSF culture result
    'presenceofseizure',        # Seizure presence
    'neckstiffness',            # Neck stiffness
    'historyoffever',           # Fever history
    'alteredconsciousness',     # Consciousness level
    'bulgingfontanel',          # Fontanel bulging (infants)
    'mening_received',          # Meningitis vaccine received
    'hibvaccine',               # Hib vaccine received
    'pneumoduringcampaign',     # Pneumococcal vaccine during campaign
    'target'                    # Label: bacterial meningitis (1 = yes, 0 = no)
]

df = df[selected_columns]
df.head()

In [None]:
df['age'].fillna(df['age'].mean(), inplace=True)

In [None]:
df['sexofthepatient'].fillna('M', inplace=True)

In [None]:
df['whitecellcount'].fillna(df['whitecellcount'].mean(), inplace=True)

In [None]:
df['csfproteinresult'].fillna(df['csfproteinresult'].mean(), inplace=True)

In [None]:
df['csfglucoseresult'].fillna(df['csfglucoseresult'].mean(), inplace=True)

In [None]:
df['csf_gram_resul'].fillna(df['csf_gram_resul'].mode(), inplace=True)

In [None]:
df.head()

In [None]:
df['csfcultureresults'].fillna(df['csfcultureresults'].mean(), inplace=True)

In [None]:
df['presenceofseizure'].fillna(df['presenceofseizure'].mean(), inplace=True)

In [None]:
df['neckstiffness'].fillna(df['neckstiffness'].mean(), inplace=True)

In [None]:
df['historyoffever'].fillna(df['historyoffever'].mean(), inplace=True)

In [None]:
df['alteredconsciousness'].fillna(df['alteredconsciousness'].mean(), inplace=True)

In [None]:
df['bulgingfontanel'].fillna(df['bulgingfontanel'].mean(), inplace=True)

In [None]:
df['mening_received'].fillna(df['mening_received'].mean(), inplace=True)

In [None]:
df.head()

In [None]:
df['hibvaccine'].fillna(df['hibvaccine'].mean(), inplace=True)

In [None]:
df['pneumoduringcampaign'].fillna(df['pneumoduringcampaign'].mean(), inplace=True)

In [None]:
df['sexofthepatient'] = df['sexofthepatient'].map({'F': 0, 'M': 1})

In [None]:
df['target'].isna().sum()

In [None]:
X = df.drop(columns=['target'])
y = df['target']  

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))