In [22]:
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [23]:
titanic_df = pd.read_csv("../Ch01/titanic_train.csv")
titanic_df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [24]:
titanic_df = titanic_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch',
                         'Fare', 'Embarked', 'Survived']]
titanic_df['Age'] = titanic_df['Age'].fillna(titanic_df['Age'].mean())
titanic_df['Embarked'] = titanic_df['Embarked']\
    .fillna(titanic_df['Embarked'].mode()[0])
titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    891 non-null    int64  
 1   Sex       891 non-null    object 
 2   Age       891 non-null    float64
 3   SibSp     891 non-null    int64  
 4   Parch     891 non-null    int64  
 5   Fare      891 non-null    float64
 6   Embarked  891 non-null    object 
 7   Survived  891 non-null    int64  
dtypes: float64(2), int64(4), object(2)
memory usage: 55.8+ KB


In [25]:
label_encoders = {}
for column in ['Sex', 'Embarked']:
    le = LabelEncoder()
    le = le.fit(titanic_df[column])
    titanic_df[column] = le.transform(titanic_df[column])
    label_encoders[column] = le

In [26]:
X = titanic_df.drop('Survived', axis=1)
y = titanic_df['Survived']

In [27]:
x_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,
                                                    random_state=42)

In [28]:
param_grid = {
    'max_depth' : [10,20,30,40],
    'min_samples_split' : [2,5,10],
    'min_samples_leaf' : [1,2,4]
}

In [29]:
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42),
                           param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)

In [30]:
best_model = grid_search.best_estimator_
print(f"Best parameters:{grid_search.best_params_}")
print(f"Best Cross Validation Accuracy:{grid_search.best_score_:.3f}")

Best parameters:{'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best Cross Validation Accuracy:0.815


In [31]:
def get_category(age):
    if age <= -1: cat='Unknown'
    elif age <= 5: cat='Baby'
    elif age <= 12: cat='Child'
    elif age <= 18: cat='Teenager'
    elif age <= 25: cat='Student'
    elif age <= 35: cat='Young Adult'
    elif age <= 60: cat='Adult'
    else: cat='Elderly'

    return cat

In [32]:
temp_df = titanic_df.copy()
temp_df['Sex'] = label_encoders['Sex'].inverse_transform(titanic_df['Sex'])
group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Elderly']

In [33]:
titanic_df['Age_cat'] = titanic_df['Age'].apply(lambda x: get_category(x))

In [34]:
def predict_survival(Pclass, Sex, Age, SibSp, Parch, Fare, Embarked):
    Sex = label_encoders['Sex'].transform([Sex])[0]
    Embarked = label_encoders['Embarked'].transform([Embarked])[0]

    input_data = pd.DataFrame({
        'Pclass': [Pclass],
        'Sex':[Sex],
        'Age':[Age],
        'SibSp':[SibSp],
        'Parch':[Parch],
        'Fare':[Fare],
        'Embarked':[Embarked]
    })

    prediction = best_model.predict(input_data)[0]
    return 'Survived' if prediction == 1 else 'Did not survived'



In [35]:
def visualize_data(feature):
    plt.figure(figsize=(8,6))
    if feature == 'Sex':
        sns.barplot(x='Sex', y='Survived', data=titanic_df)
        plt.xticks([0,1], ['Female', 'Male'])
        plt.title("Survival Rate")
    elif feature == "Pclass":
        sns.barplot(x='Pclass', y='Survived', hue='Sex', data=temp_df)
        plt.title("Survival Rate by Ticket Class")
        plt.xlabel("Ticket Class")
        plt.ylabel("Survival Rate")
    elif feature == 'Age':
        sns.barplot(x='Age_cat', y="Survived", hue='Sex', data=temp_df, order=group_names)

    return plt

In [36]:
predict_interface = gr.Interface(
    fn = predict_survival,
    inputs = [
        gr.Dropdown(choices=[1,2,3], label="Ticket Class"),
        gr.Radio(choices=['male','female'], label='Sex'),
        gr.Slider(min_width=0, maximum=titanic_df['Age'].max(), step=1, label='Age'),
        gr.Slider(min_width=0, maximum=titanic_df['SibSp'].max(), step=1, label='Siblings/Spouse Aboard'),
        gr.Slider(min_width=0, maximum=titanic_df['Parch'].max(), step=1, label='Parents/Children Aboard'),
        gr.Number(label='Fare'),
        gr.Radio(choices=['C', 'Q', 'S'], label="Embarked Port")
    ],
    outputs="text",
    title="Titanic Survival Prediction",
    description="Enter passenger details to predict survival on the Titanic"
)

In [37]:
eda_interface = gr.Interface(
    fn = visualize_data,
    inputs = gr.Radio(choices=['Sex', 'Pcalss', 'Age'], label="select Feature to Visualize"),
    outputs="plot",
    title = "Titanic EDA Visualization",
    description="Select a feature to visualize survival statastics on the Titanic dataset"
)

In [38]:
demo = gr.TabbedInterface(
    [predict_interface, eda_interface],
    ["Survival Prediction", "EDA Visualization"]
)
demo.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\lee08\anaconda3\envs\AIML_Lecture\Lib\site-packages\gradio\queueing.py", line 624, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lee08\anaconda3\envs\AIML_Lecture\Lib\site-packages\gradio\route_utils.py", line 323, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lee08\anaconda3\envs\AIML_Lecture\Lib\site-packages\gradio\blocks.py", line 2018, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lee08\anaconda3\envs\AIML_Lecture\Lib\site-packages\gradio\blocks.py", line 1567, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lee08\anaconda3\envs\AIML_Lecture\Lib\site-packages\anyio\to_thread.p