# Kimiya Ghanai Machine Learning

## breast cancer survival prediction

In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [52]:
df= pd.read_csv('BRCA.csv')
df.head()

Unnamed: 0,Patient_ID,Age,Gender,Protein1,Protein2,Protein3,Protein4,Tumour_Stage,Histology,ER status,PR status,HER2 status,Surgery_type,Date_of_Surgery,Date_of_Last_Visit,Patient_Status
0,TCGA-D8-A1XD,36.0,FEMALE,0.080353,0.42638,0.54715,0.27368,III,Infiltrating Ductal Carcinoma,Positive,Positive,Negative,Modified Radical Mastectomy,15-Jan-17,19-Jun-17,Alive
1,TCGA-EW-A1OX,43.0,FEMALE,-0.42032,0.57807,0.61447,-0.031505,II,Mucinous Carcinoma,Positive,Positive,Negative,Lumpectomy,26-Apr-17,09-Nov-18,Dead
2,TCGA-A8-A079,69.0,FEMALE,0.21398,1.3114,-0.32747,-0.23426,III,Infiltrating Ductal Carcinoma,Positive,Positive,Negative,Other,08-Sep-17,09-Jun-18,Alive
3,TCGA-D8-A1XR,56.0,FEMALE,0.34509,-0.21147,-0.19304,0.12427,II,Infiltrating Ductal Carcinoma,Positive,Positive,Negative,Modified Radical Mastectomy,25-Jan-17,12-Jul-17,Alive
4,TCGA-BH-A0BF,56.0,FEMALE,0.22155,1.9068,0.52045,-0.31199,II,Infiltrating Ductal Carcinoma,Positive,Positive,Negative,Other,06-May-17,27-Jun-19,Dead


In [53]:
df.isnull().sum()

Patient_ID             7
Age                    7
Gender                 7
Protein1               7
Protein2               7
Protein3               7
Protein4               7
Tumour_Stage           7
Histology              7
ER status              7
PR status              7
HER2 status            7
Surgery_type           7
Date_of_Surgery        7
Date_of_Last_Visit    24
Patient_Status        20
dtype: int64

In [54]:
df = df.dropna()
df.isnull().sum()

Patient_ID            0
Age                   0
Gender                0
Protein1              0
Protein2              0
Protein3              0
Protein4              0
Tumour_Stage          0
Histology             0
ER status             0
PR status             0
HER2 status           0
Surgery_type          0
Date_of_Surgery       0
Date_of_Last_Visit    0
Patient_Status        0
dtype: int64

In [55]:
df['Patient_Status'].value_counts() , df['Tumour_Stage'].value_counts()

(Patient_Status
 Alive    255
 Dead      62
 Name: count, dtype: int64,
 Tumour_Stage
 II     180
 III     77
 I       60
 Name: count, dtype: int64)

## Normalizing data

In [58]:
x= df.drop(['Patient_Status','Patient_ID','Date_of_Surgery','Date_of_Last_Visit'],axis='columns')
y= df['Patient_Status']
y.shape

(317,)

In [59]:
from sklearn.preprocessing import LabelEncoder
x = x.apply(LabelEncoder().fit_transform)
y = LabelEncoder().fit_transform(y)
y.shape

(317,)

## Split train\test

In [61]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [63]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = pd.DataFrame(scaler.fit_transform(x_train), columns=x_train.columns)
x_test_scaled = pd.DataFrame(scaler.transform(x_test), columns=x_test.columns)

## Prediction with SVM

In [64]:
from sklearn import svm
svc = svm.SVC(class_weight='balanced')
svc.fit(x_train_scaled, y_train)
y_pred = svc.predict(x_test)

In [67]:
mappings = {
    "Tumour_Stage": {"I": 1, "II": 2, "III": 3},
    "Histology": {"Infiltrating Ductal Carcinoma": 1, 
                  "Infiltrating Lobular Carcinoma": 2, 
                  "Mucinous Carcinoma": 3},
    "ER status": {"Positive": 1},
    "PR status": {"Positive": 1},
    "HER2 status": {"Positive": 1, "Negative": 2},
    "Gender": {"MALE": 0, "FEMALE": 1},
    "Surgery_type": {"Other": 1, "Modified Radical Mastectomy": 2, 
                     "Lumpectomy": 3, "Simple Mastectomy": 4}
}


In [73]:
user_input = {
    "Age": float(input("Age: ")),
    "Gender": input("Gender: "),
    "Protein1": float(input("Protein1: ")),
    "Protein2": float(input("Protein2: ")),
    "Protein3": float(input("Protein3: ")),
    "Protein4": float(input("Protein4: ")),
    "Tumour_Stage": input("Tumour Stage: "),
    "Histology": input("Histology: "),
    "ER status": input("ER status: "),
    "PR status": input("PR status: "),
    "HER2 status": input("HER2 status: "),
    "Surgery_type": input("Surgery type: ")
}

for col, mapping in mappings.items():
    if col in user_input:
        user_input[col] = mapping.get(user_input[col], 0)  
columns = x_train.columns
user_df = pd.DataFrame([user_input], columns=columns)
user_scaled = pd.DataFrame(scaler.transform(user_df), columns=columns)
pred = svc.predict(user_scaled)[0]
label_map = {0: "Dead", 1: "Alive"}
print("Prediction:", label_map[pred])

Age:  23
Gender:  1
Protein1:  0.080353
Protein2:  0.42638
Protein3:  0.54715
Protein4:  0.273680
Tumour Stage:  3
Histology:  1
ER status:  1
PR status:  1
HER2 status:  2
Surgery type:  2


Prediction: Alive


In [None]:
#36.0, 1, 0.080353, 0.42638, 0.54715, 0.273680, 3, 1, 1, 1, 2, 2