In [None]:
!gdown 18hMYihVfAZm79KEM-6UTfJZSJZ-7irPr

Downloading...
From: https://drive.google.com/uc?id=18hMYihVfAZm79KEM-6UTfJZSJZ-7irPr
To: /content/test.csv
  0% 0.00/28.6k [00:00<?, ?B/s]100% 28.6k/28.6k [00:00<00:00, 43.9MB/s]


In [None]:
!gdown 1rKBKkeBScmRWn_yEeeC97YbsJZoc-E_3

Downloading...
From: https://drive.google.com/uc?id=1rKBKkeBScmRWn_yEeeC97YbsJZoc-E_3
To: /content/train.csv
  0% 0.00/61.2k [00:00<?, ?B/s]100% 61.2k/61.2k [00:00<00:00, 77.4MB/s]


In [None]:
import pandas as pd
import numpy as np

In [None]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

print("Missing values before processing:")
print(train_df.isnull().sum())

Missing values before processing:
PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


In [None]:
def preprocess_data(df):
    df_clean = df.copy()

    cols_to_drop = ['Name', 'Ticket', 'Cabin', 'PassengerId']
    df_clean = df_clean.drop(columns=cols_to_drop, errors='ignore')

    df_clean['Age'] = df_clean['Age'].fillna(df_clean['Age'].median())

    if 'Embarked' in df_clean.columns:
        df_clean['Embarked'] = df_clean['Embarked'].fillna(df_clean['Embarked'].mode()[0])

    if 'Fare' in df_clean.columns:
        df_clean['Fare'] = df_clean['Fare'].fillna(df_clean['Fare'].median())

    df_clean = pd.get_dummies(df_clean, columns=['Sex', 'Embarked'], drop_first=True)
    df_clean = df_clean.astype(float)

    return df_clean

In [None]:
X_train_processed = preprocess_data(train_df)
X_test_processed = preprocess_data(test_df)

y_train = train_df['Survived']
if 'Survived' in X_train_processed.columns:
    X_train = X_train_processed.drop(columns=['Survived'])
else:
    X_train = X_train_processed

X_train, X_test = X_train.align(X_test_processed, join='left', axis=1, fill_value=0)

print("\nShape of processed Train data:", X_train.shape)
print("Shape of processed Test data:", X_test.shape)
print("\nFirst 5 rows of processed data:")
print(X_train.head())


Shape of processed Train data: (891, 8)
Shape of processed Test data: (418, 8)

First 5 rows of processed data:
   Pclass   Age  SibSp  Parch     Fare  Sex_male  Embarked_Q  Embarked_S
0     3.0  22.0    1.0    0.0   7.2500       1.0         0.0         1.0
1     1.0  38.0    1.0    0.0  71.2833       0.0         0.0         0.0
2     3.0  26.0    0.0    0.0   7.9250       0.0         0.0         1.0
3     1.0  35.0    1.0    0.0  53.1000       0.0         0.0         1.0
4     3.0  35.0    0.0    0.0   8.0500       1.0         0.0         1.0


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    X_train, y_train, test_size=0.2, random_state=93
)

rf_model = RandomForestClassifier(n_estimators=200, random_state=93)

rf_model.fit(X_train_split, y_train_split)

train_acc = accuracy_score(y_train_split, rf_model.predict(X_train_split))
test_acc = accuracy_score(y_test_split, rf_model.predict(X_test_split))

print(f"Accuracy on Train Set: {train_acc * 100:.2f}%")
print(f"Accuracy on Test Set: {test_acc * 100:.2f}%")

print("\nClassification Report:\n")
print(classification_report(y_test_split, rf_model.predict(X_test_split)))

Accuracy on Train Set: 98.17%
Accuracy on Test Set: 87.15%

Classification Report:

              precision    recall  f1-score   support

           0       0.91      0.88      0.90       113
           1       0.81      0.85      0.83        66

    accuracy                           0.87       179
   macro avg       0.86      0.87      0.86       179
weighted avg       0.87      0.87      0.87       179



In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

gb_model = GradientBoostingClassifier(random_state=93)

gb_model.fit(X_train_split, y_train_split)

train_acc_gb = accuracy_score(y_train_split, gb_model.predict(X_train_split))
test_acc_gb = accuracy_score(y_test_split, gb_model.predict(X_test_split))

print(f"Gradient Boosting - Train Accuracy: {train_acc_gb:.4f}")
print(f"Gradient Boosting - Test Accuracy: {test_acc_gb:.4f}")

print("\nClassification Report (Gradient Boosting):\n")
print(classification_report(y_test_split, gb_model.predict(X_test_split)))

Gradient Boosting - Train Accuracy: 0.8876
Gradient Boosting - Test Accuracy: 0.8547

Classification Report (Gradient Boosting):

              precision    recall  f1-score   support

           0       0.87      0.90      0.89       113
           1       0.82      0.77      0.80        66

    accuracy                           0.85       179
   macro avg       0.85      0.84      0.84       179
weighted avg       0.85      0.85      0.85       179



In [1]:
!gdown 1ACKopu5r9OPI7Ed_7FIpQzLWpmkMqBXF

Downloading...
From (original): https://drive.google.com/uc?id=1ACKopu5r9OPI7Ed_7FIpQzLWpmkMqBXF
From (redirected): https://drive.google.com/uc?id=1ACKopu5r9OPI7Ed_7FIpQzLWpmkMqBXF&confirm=t&uuid=db9d8f00-b052-4319-a0c5-95868b6b7f85
To: /content/HemoricaV1-CT.zip
 17% 436M/2.53G [00:06<00:19, 106MB/s] Traceback (most recent call last):
  File "/usr/local/bin/gdown", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gdown/__main__.py", line 172, in main
    download(
  File "/usr/local/lib/python3.12/dist-packages/gdown/download.py", line 368, in download
    for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/requests/models.py", line 820, in generate
    yield from self.raw.stream(chunk_size, decode_content=True)
  File "/usr/local/lib/python3.12/dist-packages/urllib3/response.py", line 1091, in stream
    data = self.re