<a href="https://colab.research.google.com/github/ldsAS/Tibame-AI-Learning/blob/main/Tibame20250618_LAB_%E9%90%B5%E9%81%94%E5%B0%BC%E8%99%9F_Gradio%E4%BB%8B%E9%9D%A2%E7%89%88.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
class AutoPreprocess:
    def __init__(self):
        self.scaler = {}
        self.fillna_value = {}
        self.onehotencode_value = {}
        self.field_names = []
        self.final_field_names = []

    def fit(self, data, field_names):
        self.__init__()
        self.field_names = field_names

        for fname in field_names:
            #自動補空值
            if (data[fname].dtype == object) or (data[fname].dtype == str): #字串型態欄位
                self.fillna_value[fname] = data[fname].mode()[0] #補眾數
                # self.fillna_value[fname] = 'np.nan'
                # self.fillna_value[fname] = np.nan # 維持空值
            elif data[fname].dtype == bool: #布林型態
                self.fillna_value[fname] = data[fname].mode()[0] #補眾數
            else: # 數字型態
                self.fillna_value[fname] = data[fname].median()  #補中位數

            #自動尺度轉換(scaling)
            if (data[fname].dtype == object) or (data[fname].dtype == str): #字串型態欄位
                pass #不用轉換
            elif data[fname].dtype == bool: #布林型態
                pass #不用轉換
            else: # 數字型態
                vc = data[fname].value_counts()
                if data[fname].isin([0, 1]).all(): #當數值只有0跟1
                    pass #不用轉換
                elif pd.api.types.is_integer_dtype(data[fname]) and data[fname].nunique() <= 10: #是否簡單的整數型類別且數量小於10
                    self.scaler[fname] = MinMaxScaler()
                    self.scaler[fname].fit(data[[fname]])
                else: #其他的數字型態
                    self.scaler[fname] = StandardScaler()
                    self.scaler[fname].fit(data[[fname]])


            #自動編碼
            if (data[fname].dtype == object) or (data[fname].dtype == str): #字串型態欄位, onehotencode
                field_value = data[fname].value_counts().index
                self.onehotencode_value[fname] = field_value
                for value in field_value:
                    fn = fname+"_"+value
                    # data[fn] = (data[fname] == value).astype('int8')
                    self.final_field_names.append(fn)
            elif data[fname].dtype == bool: #布林型態 轉成0跟1
                # data[fname] = data[fname].astype(int)
                self.final_field_names.append(fname)
            else: # 數字型態 不用重新編碼
                self.final_field_names.append(fname)

        return

    def transform(self, data):
        #如果輸入的data是dict，要先轉成dataframe
        if isinstance(data, dict):
            for fname in self.field_names:
                if fname in data:
                    data[fname] = [data[fname]]
                else:
                    data[fname] = [np.nan]
            data = pd.DataFrame(data)
        else: #將資料複製一份，不修改原本的資料
            data = data.copy()

        for fname in self.field_names:
            #自動補空值
            if data[fname].isnull().any(): #有空值
                # if fname in self.fillna_value:
                    data[fname] = data[fname].fillna(self.fillna_value[fname])


            #自動尺度轉換(scaling)
            if fname in self.scaler:
                data[fname] = self.scaler[fname].transform(data[[fname]])

            #自動編碼
            if (data[fname].dtype == object) or (data[fname].dtype == str): #字串型態欄位, onehotencode
                if fname in self.onehotencode_value:
                    field_value = self.onehotencode_value[fname]
                for value in field_value:
                    fn = fname+"_"+value
                    data[fn] = (data[fname] == value).astype('int8')
            elif data[fname].dtype == bool: #布林型態 轉成0跟1
                data[fname] = data[fname].astype(int)
            else: # 數字型態 不用重新編碼
                pass
        return data[self.final_field_names]

    def save(self, file_name):
        with open(file_name, "wb") as f:
            pickle.dump(self, f)

    @staticmethod
    def load(file_name):
        with open(file_name, "rb") as f:
            return pickle.load(f)

In [2]:
#load model
ap = AutoPreprocess.load("preprocess.bin")
f =  open("model.bin", "rb")
model =  pickle.load(f)

In [3]:
import gradio as gr

def get_value(level, sex, age, p1, p2, price, port):
    #檢查必要欄位是否有填值
    if type(age) == str:
        return "請輸入[年齡]的值"
    if type(price) == str:
        return "請輸入[船票價格]的值"

    sex_value = 'female'
    if sex == '男':
        sex_value = 'male'

    data_dcit = {
        "船票等級" : int(level),
        "性別" : sex_value,
        "年紀" : age,
        "旁系親屬數目" : int(p1),
        "直系親屬數目" : int(p1),
        "船票價格" : price,
        "出發港口" : port,
    }

    X = ap.transform(data_dcit)
    y_pred = model.predict_proba(X)[:,1]
    v = y_pred*100
    return f"乘客的生存率為 {v}%"

with gr.Blocks() as demo:
    gr.Markdown("## 乘客生存預測\n輸入乘客的資料，預測它的生存機率")

    inputs=[
        gr.Radio(["1", "2", "3"], label="船票等級", value="1"),
        gr.Radio(["男", "女"], label="性別", value="男"),
        gr.Number(label="年紀", value=""),
        gr.Dropdown(choices=["0", "1", "2", "3", "4", "5", "6", "7", "8"], label="旁系親屬數目", value="0"),
        gr.Dropdown(choices=["0", "1", "2", "3", "4", "5", "6"], label="直系親屬數目", value="0"),
        gr.Number(label="船票價格", value=""),
        gr.Radio(["S", "C", "Q"], label="出發港口", value="S"),
    ]


    eval_button = gr.Button("預估")
    output = gr.Textbox(label="乘客生存率")

    eval_button.click(fn=get_value, inputs=inputs, outputs=output)


# 啟動 Web 應用
demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://98f2f5f73ae65eba53.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


