<a href="https://colab.research.google.com/github/nawazf/BSMM-8740-1/blob/main/NN_titanic_Begin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
# Load the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic_data = pd.read_csv(url)

In [None]:
titanic_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [None]:
# Select the features and target variable
features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked']
target = 'Survived'
X = titanic_data[features]
y = titanic_data[target]

In [None]:
X

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked
0,3,male,22.0,7.2500,S
1,1,female,38.0,71.2833,C
2,3,female,26.0,7.9250,S
3,1,female,35.0,53.1000,S
4,3,male,35.0,8.0500,S
...,...,...,...,...,...
886,2,male,27.0,13.0000,S
887,1,female,19.0,30.0000,S
888,3,female,,23.4500,S
889,1,male,26.0,30.0000,C


In [None]:
type(X)

pandas.core.frame.DataFrame

In [None]:
type(X['Age'])

pandas.core.series.Series

In [None]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [None]:
type(y)

pandas.core.series.Series

In [None]:
# Preprocess the data
numeric_features = ['Age', 'Fare']
categorical_features = ['Pclass', 'Sex', 'Embarked']

In [None]:
numeric_features

['Age', 'Fare']

In [None]:
categorical_features

['Pclass', 'Sex', 'Embarked']

In [None]:
X[categorical_features]

Unnamed: 0,Pclass,Sex,Embarked
0,3,male,S
1,1,female,C
2,3,female,S
3,1,female,S
4,3,male,S
...,...,...,...
886,2,male,S
887,1,female,S
888,3,female,S
889,1,male,C


In [None]:
X[categorical_features].nunique()

Pclass      3
Sex         2
Embarked    3
dtype: int64

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [None]:
X = preprocessor.fit_transform(X)
X

array([[22.        ,  7.25      ,  0.        , ...,  0.        ,
         1.        ,  0.        ],
       [38.        , 71.2833    ,  1.        , ...,  0.        ,
         0.        ,  0.        ],
       [26.        ,  7.925     ,  0.        , ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [29.69911765, 23.45      ,  0.        , ...,  0.        ,
         1.        ,  0.        ],
       [26.        , 30.        ,  1.        , ...,  0.        ,
         0.        ,  0.        ],
       [32.        ,  7.75      ,  0.        , ...,  1.        ,
         0.        ,  0.        ]])

In [None]:
X.shape

(891, 11)

In [None]:
X[0]

array([22.  ,  7.25,  0.  ,  0.  ,  1.  ,  0.  ,  1.  ,  0.  ,  0.  ,
        1.  ,  0.  ])

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Decision Tree

In [None]:
# Train the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = clf.predict(X_train)

# Evaluate the model
accuracy = accuracy_score(y_train, y_pred)
print("Training Accuracy:", accuracy)

Training Accuracy: 0.9789325842696629


In [None]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.770949720670391
