In [1]:
import pandas as pd

In [2]:
global_df = pd.read_csv("global_cancer_patients_2015_2024.csv")
global_df.columns

Index(['Patient_ID', 'Age', 'Gender', 'Country_Region', 'Year', 'Genetic_Risk',
       'Air_Pollution', 'Alcohol_Use', 'Smoking', 'Obesity_Level',
       'Cancer_Type', 'Cancer_Stage', 'Treatment_Cost_USD', 'Survival_Years',
       'Target_Severity_Score'],
      dtype='object')

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
df = pd.read_csv("global_cancer_patients_2015_2024.csv")

# Drop rows with missing values
df = df.dropna()

# Features and target
X = df.drop(columns=['Target_Severity_Score', 'Patient_ID'])
y = df['Target_Severity_Score']

# Identify categorical and numerical columns
categorical = ['Gender', 'Country_Region', 'Cancer_Type', 'Cancer_Stage']
numerical = [col for col in X.columns if col not in categorical]

# Preprocessing
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build preprocessing + model pipeline
def create_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Preprocess features
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Create and train the model
model = create_model(X_train_processed.shape[1])
model.fit(X_train_processed, y_train, epochs=20, batch_size=32, validation_split=0.1)

# Predict
predictions = model.predict(X_test_processed)
print(predictions[:5])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 3.6337 - val_loss: 0.0214
Epoch 2/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 0.0147 - val_loss: 0.0043
Epoch 3/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0035 - val_loss: 0.0016
Epoch 4/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0013 - val_loss: 6.4714e-04
Epoch 5/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 5.2757e-04 - val_loss: 2.9603e-04
Epoch 6/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 2.3853e-04 - val_loss: 2.0194e-04
Epoch 7/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 1.5625e-04 - val_loss: 1.4207e-04
Epoch 8/20
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.5285e-04 - val_loss: