# Dash App with GB model

### Import necessary libraries

In [8]:
import dash
from dash import dcc, html, Input, Output, State
import pandas as pd
import numpy as np
import pickle
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import plotly.graph_objs as go
from dash_bootstrap_components import Collapse
import plotly.graph_objects as go
import plotly.io as pio
import joblib
from joblib import load
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import PolynomialFeatures
import os
import traceback


### Function Random Line

In [9]:
def create_sample_line(adjustments=None):
    # Initialize an empty dictionary for data
    data = {}

    # Generate random values for each column
    data['sex'] = np.random.choice(['male', 'female'])
    data['age'] = np.random.randint(18, 80)
    data['religion'] = np.random.choice(['cath', 'none', 'other', 'prot'])

    # Convert the single data line into a DataFrame
    df = pd.DataFrame(data, index=[0])

    # Adjust variables based on the provided adjustments
    if adjustments:
        for variable, target in adjustments.items():
            if variable == 'age':
                # Adjust age to match the target mean
                mean_age = df['age'].mean()
                df['age'] = df['age'] + (target - mean_age)
            elif variable in df.columns:
                if isinstance(target, str):
                    # Adjust categorical variable
                    df[variable] = target
                elif isinstance(target, (int, float)):
                    # Adjust numerical variable
                    df[variable] = df[variable] * (target / df[variable].mean())
                else:
                    raise ValueError(f"Invalid target type for {variable}.")
            else:
                raise ValueError(f"Invalid variable name: {variable}")

    return df

## Load pre-trained model

In [10]:
# Load pre-trained model
model_path = f'../data/models/best_gb_model.pkl'
gbc = joblib.load(model_path)
feature_names = joblib.load('../data/models/feature_names.pkl')

# Load label encoders
label_encoders_path = f'../data/models/label_encoders.pkl'
label_encoders = joblib.load(label_encoders_path)

## Function to ensure correct data format and features

In [11]:
def prepare_input_data(input_data):
    print("Preparing input data...")  # Debug print
    
    # Encode categorical variables
    label_encoders = joblib.load('../data/models/label_encoders.pkl')
    
    for col in input_data.columns:
        if col in label_encoders:
            le = label_encoders[col]
            # Handle unknown categories
            input_data[col] = input_data[col].map(lambda x: x if x in le.classes_ else le.classes_[0])
            input_data[col] = le.transform(input_data[col])
    
    # Create polynomial features
    poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
    X_poly = poly.fit_transform(input_data)

    # Add interaction feature
    X_poly_interaction = np.hstack([X_poly, (input_data['age'].values * input_data['income'].values).reshape(-1, 1)])

    # Create DataFrame with correct feature names
    prepared_data = pd.DataFrame(X_poly_interaction, columns=feature_names)

    print("Prepared data shape:", prepared_data.shape)  # Debug print
    print("Prepared data columns:", prepared_data.columns)  # Debug print
    print("Prepared data types:", prepared_data.dtypes)  # Debug print
    return prepared_data

# App

In [12]:
# Pre-Define Categorical Options 
sex_options = [
    {'label': 'Male', 'value': 'male'},
    {'label': 'Female', 'value': 'female'}
]

educ_options = [
    {'label': 'Primary Education', 'value': 'primary_education'},
    {'label': 'Secondary Education', 'value': 'secondary_education'},
    {'label': 'Tertiary Education', 'value': 'tertiary_education'}
]

# Function to map education value to category
def map_education_to_category(value):
    primary_education = [
        'primary school',
        'basic vocational training',
    ]
    secondary_education = [
        'compulsory education',
        'vocational college'
        'high school'
    ]
    tertiary_education = [
        'university',
        'diploma school',
        'higher vocational training',
        'vocational college'
    ]
    
    if value in primary_education:
        return 'Primary education'
    elif value in secondary_education:
        return 'Secondary education'
    elif value in tertiary_education:
        return 'Tertiary education'


income_options = [
    {'label': "Less than 4'000CHF per month", 'value': "low income"},
    {'label': "4'001CHF -  7'000CHF", 'value': 'rather low income'},
    {'label': "7'001CHF - 10'000CHF", 'value': 'middle income'},
    {'label': "10'001CHF - 13'000CHF", 'value': 'rather high income'},
    {'label': "More than 13'001CHF", 'value': 'high income'}
]

religion_options = [
    {'label': 'Catholic', 'value': 'cath'},
    {'label': 'Protestant', 'value': 'prot'},
    {'label': 'No religion', 'value': 'none'},
    {'label': 'Other religion', 'value': 'other'}
]

sg1_options = [
    {'label': 'German speaking region', 'value': 'German spoken'},
    {'label': 'French speaking region', 'value': 'French spoken'},
    {'label': 'Italian speaking region', 'value': 'Italian spoken'}
]

sg9_options = [
    {'label': 'Rural', 'value': 'rural'},
    {'label': 'Urban', 'value': 'urban'}
]

sc1_options = [
    {'label': 'Full-time', 'value': 'full-time'},
    {'label': 'Part-time', 'value': 'part-time'},
    {'label': 'In training/formation', 'value': 'in training/formation'},
    {'label': 'In family business', 'value': 'in family business'},
    {'label': 'In household', 'value': 'in household'},
    {'label': 'Without Profession', 'value': 'without profession'},
    {'label': 'Disabled', 'value': 'disabled'},
    {'label': 'Unemployed', 'value': 'unemployed'},
    {'label': 'Retired', 'value': 'retired'},
    {'label': 'Other', 'value': 'other'},
]

sc7b_options = [
    {'label': 'Farmers', 'value': 'farmers'},
    {'label': 'Other self-employed', 'value': 'other self-employed'},
    {'label': 'Semiskilled and unskilled workers', 'value': 'semiskilled and unskilled workers'},
    {'label': 'Skilled workers/foremen', 'value': 'skilled workers/foremen'},
    {'label': 'Routine non-manual workers', 'value': 'routine non-manual workers'},
    {'label': 'Service class employees', 'value': 'service class employees'},
    {'label': 'Others', 'value': 'others'}
]

pi1_options = [
    {'label': 'Not interested at all', 'value': 'not interested at all'},
    {'label': 'Rather not interested', 'value': 'rather not interested'},
    {'label': 'Rather interested', 'value': 'rather interested'},
    {'label': 'Very interested', 'value': 'very interested'}
]

pm3_options = [
    {'label': 'Materialist', 'value': 'materialist'},
    {'label': 'Mixed materialist', 'value': 'mixed mat'},
    {'label': 'Mixed Post-materialist', 'value': 'mixed postmat'},
    {'label': 'Post-materialist', 'value': 'post-materialist'}
]

vp1_options = [
    {'label': 'Yes', 'value': 'yes'},
    {'label': 'No', 'value': 'no'}
]

pid1_options = [
    {'label': 'Yes', 'value': 'yes'},
    {'label': 'No', 'value': 'no'}
]



In [13]:
# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.LUX, dbc.icons.FONT_AWESOME], suppress_callback_exceptions=True)

# Define sidebar layout
sidebar = dbc.Nav(
    [
        dbc.NavLink("Home", href="/", active="exact"),
        dbc.NavLink("About the Project", href="/about", active="exact"),
        dbc.NavLink("Party Prediction", href="/model", active="exact"),
        dbc.NavLink("Variable Description", href="/variables", active="exact"),
    ],
    vertical=True,
    pills=True,
    className="bg-light",
)

# Define content layout
content = html.Div(id="page-content", style={"padding": "20px"})

# Main layout
app.layout = dbc.Container(
    [
        dcc.Location(id="url"),
        dbc.Row(
            [
                dbc.Col(sidebar, width=2),
                dbc.Col(content, width=10),
            ]
        )
    ],
    fluid=True,
)

# Home page layout
home_layout = html.Div([
    html.H1('Do you want to get to know your voters?', style={'textAlign': 'center', 'marginTop': '20px'}),
    html.P('Step right up to the Party Support Prediction app! Discover who is backing your party and unlock the secrets to winning hearts and votes. By peeking into your voter demographics, you will know just where to aim those campaign fireworks. With this app by your side, your next victory dance is practically guaranteed!', style={'textAlign': 'center', 'fontSize': '1.2rem', 'marginTop': '20px', 'marginBottom': '30px'}),
    html.Img(src="https://media.giphy.com/media/j3gsT2RsH9K0w/giphy.gif?cid=790b7611api0qdqf6uidjh49qjubb3y0vn58ukaej8q68bm4&ep=v1_gifs_search&rid=giphy.gif&ct=g", width="240", height="240",
             style={"border": "none", "margin": "auto", "display": "block"}),
    html.Div(
        dcc.Link(
            dbc.Button('Click here to predict', color='primary', className='mt-3'),
            href='/model'
        ),
        style={'textAlign': 'center', 'marginTop': '20px'} 
    )
])

# About page layout
about_layout = html.Div([
    html.H1("About Page"),
    
    html.H2("General"),
    dcc.Markdown("""
    By using individual-level data on demographics and Swiss voting in national elections, we train a random forest classifier that allows us to make predictions of the likelihood of a person to vote for given parties, depending on specific characteristics. As such, our interactive app informs both policymakers and political players about the specific demographic profile of voters.
    """),
    
    html.H2("Data (including data cleaning)"),
    dcc.Markdown("""
    For this project, we use the cumulative dataset from the Swiss Election Study (SELECTS). It contains individual-level election data between the years 1971 and 2019. To predict our model, we include all available years. Additionally, our predictions focus on the eight most important parties which are: CSP, CVP, EVP, FDP, GLP, PdA, SP and SVP. Missing observations of numerical variables are addressed by setting them to zero. An approach frequently applied to handle the absence of data. Our final data set comprises 22,019 observations and incorporates the following demographic variables: sex, age, education, income, religion, linguistic region, urban/rural, work situation, social sectoral occupation (classification by Kriesi), political interest, postmaterialist views, voter's participation in national elections and party attachment.
    """),
    
    html.H2("Model"),
    dcc.Markdown("""
    For our classifier model, we compared the performance between a RandomForest Classifier model and a GradientBoosting model. In both cases, we reverted to various preprocessing and hyperparameter-tuning steps in order to increase not only the performance of the models but also their reliability. After several iterations, we opted for a GradientBoosting model. Precisely, in the first data preprocessing step, we generated polynomial features of degree 2 to capture non-linear relations between features using PolynomialFeatures. Then, to handle class imbalances, it was decided to apply SMOTEENN, which is a combination of SMOTE (Synthetic Minority Over-Sampling Technique) and ENN (Edited Nearest Neighbors), and then undersample the majority classes. The limitations of using such techniques are discussed below. To select the best hyperparameters we defined a hyperparameter grid and used RandomizedSearchCV with StratefiedKFold cross-validation. The model was then initialized and trained on the latter hyperparameters.
    """),
    
    html.H2("About the prediction and validity"),
    dcc.Markdown("""
    Our Gradient Boosting Classifier model, optimized through hyperparameter tuning, demonstrates moderate predictive power in forecasting individual's party support in the Swiss context. With an overall accuracy of 61% and a mean cross-validated F1 weighted score of 0.65, the model shows consistent performance across different subsets of the data. It performs particularly well in predicting support for parties like CSP, EVP, and PdA, with F1-scores ranging from 0.66 to 0.75. However, the model's performance varies across parties, with lower precision and recall for parties like FDP and SVP. This suggests that the model may struggle with certain political nuances or demographic patterns associated with these parties. The variation in performance across parties indicates that the model's predictions should be interpreted with caution, especially for parties with lower F1-scores. Furthermore, with F1-scores hovering around 0.61-0.65, there's significant room for improvement. Limitations of this model include potential overfitting to the training data, as evidenced by the high max_depth of 20, and the possibility of underrepresenting or misclassifying support for parties with lower predictive scores.
    """),
    
    html.H2("Limitations"),
    dcc.Markdown("""
    Finally, it should be noted that our analysis also contains some limitations. First, random forests, while a powerful method, may struggle with imbalanced datasets and can still overfit if the individual trees are too deep or if there is high variance in the data. Second, demographics do not necessarily capture all the complex aspects influencing someone's voting preferences. Context-specific factors, whether at the national or individual level such as an economic crisis or changes in personal life, as well as the media influence might play an important role in predicting voting behavior. Third, while back in the 1970s to 2000s religious affiliation, for instance, determined party vote largely (Lijphart, 1979), we see less such connections nowadays, also observable in the loss of power of religious parties like EVP or CVP. Thus, since our model uses data ranging back to the 1970s to predict voting behavior, predictions might not reflect current voting. Future predictions could aim to use only the most current data to provide forecasts that more accurately reflect present voting trends. Lastly, the most important limitations of using SMOTEENN are its potential to cause overfitting by generating overly similar synthetic samples, the introduction of noise in overlapping regions, and its sensitivity to parameter choices, which can make optimization challenging.
    """),
    
    html.H2("References"),
    dcc.Markdown("""
    Lijphart, A. (1979). Religious vs. Linguistic vs. Class voting: The "crucial experiment" of comparing Belgium, Canada, South Africa, and Switzerland. American Political Science Review, 73(2), 442–458. [https://doi.org/10.2307/1954890](https://www.google.com/search?q=Lijphart+1979+Religious+vs.+Linguistic+vs.+Class+voting)
    """)
])

# Model page layout
model_layout = html.Div(
    [
        html.H1('Party Support Prediction Based on Demographics', style={'textAlign': 'center', 'marginTop': '20px'}),
        html.Hr(),  # Adding a horizontal line for separation
        html.H2('A machine-learning-powered webapp for predicting Swiss parliamentary election outcomes', style={'textAlign': 'center', 'fontSize': '0.8rem', 'marginTop': '20px', 'marginBottom': '40px'}),  # Adjust marginTop and marginBottom for spacing

        dbc.Row(
            [
                dbc.Col(
                    [
                        html.Div("Enter Mean Age:"),
                        dcc.Input(value="", type='number', debounce=True, id='mean-age', min=18, max=90, step=1)
                    ], width=3),
            dbc.Col([
                html.Div("Select Sex:"),
                dcc.Dropdown(
                    id='sex-dropdown',
                    options=sex_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Education:"),
                dcc.Dropdown(
                    id='education-dropdown',
                    options=educ_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Religion:"),
                dcc.Dropdown(
                    id='religion-dropdown',
                    options=religion_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Income:"),
                dcc.Dropdown(
                    id='income-dropdown',
                    options=income_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Language Region:"),
                dcc.Dropdown(
                    id='sg1-dropdown',
                    options=sg1_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Rural/Urban:"),
                dcc.Dropdown(
                    id='sg9-dropdown',
                    options=sg9_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Work Situation:"),
                dcc.Dropdown(
                    id='sc1-dropdown',
                    options=sc1_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Sector:"),
                dcc.Dropdown(
                    id='sc7b-dropdown',
                    options=sc7b_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select Political Interest:"),
                dcc.Dropdown(
                    id='pi1-dropdown',
                    options=pi1_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Select PostMaterialism:"),
                dcc.Dropdown(
                    id='pm3-dropdown',
                    options=pm3_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Participated in Federal Elections?:"),
                dcc.Dropdown(
                    id='vp1-dropdown',
                    options=vp1_options,
                    value='0'
                )
            ], width=3),
            dbc.Col([
                html.Div("Attachment to Party?:"),
                dcc.Dropdown(
                    id='pid1-dropdown',
                    options=pid1_options,
                            value='0'
                        )
                    ], width=3),
                dbc.Col(
                    [
                        dbc.Button("Generate Predictions", id='button', color="primary", className="mr-1"),
                    ], width=3),
            ],
            className='mb-3'
        ),
        dbc.Row(
            [
                dbc.Col(
                    [
                        dcc.Graph(id='predicted-class-histogram'),  # Define the graph component here
                    ], width=12
                ),
            ]
        ),
    ]
)
variable_data = {
    "Sex": {
        "DESCRIPTION": "Gender",
        "OPTIONS": ["Male", "Female"]
    },
    "Education": {
        "DESCRIPTION": "Highest level of education",
        "OPTIONS": [
            "Primary Education: Primary School",
            "Secondary Education: Compulsory education, vocational college, High school",
            "Tertiary Education: University, Diploma School, Higher Vocational Training, Vocational College"
        ]
    },
    "Income": {
        "DESCRIPTION": "Approximate monthly income",
        "OPTIONS": [
            "Less than 4'000CHF per month",
            "4'001CHF - 7'000CHF",
            "7'001CHF - 10'000CHF",
            "1'001CHF – 13'000CHF",
            "More than 13'001CHF per month"
        ]
    },
    "Religion": {
        "DESCRIPTION": "Religious affiliation",
        "OPTIONS": ["Catholic", "Protestant", "Other: Jewish, Muslim, other religion", "None"]
    },
    "Linguistic Region": {
        "DESCRIPTION": "Linguistic Region",
        "OPTIONS": ["German spoken", "French spoken", "Italian spoken"]
    },
    "Rural/Urban": {
        "DESCRIPTION": "City – countryside",
        "OPTIONS": ["Urban", "Rural"]
    },
    "Employment Status": {
        "DESCRIPTION": "Current employment status",
        "OPTIONS": [
            "Full time (40 hrs per week or more)",
            "Part time (5-39 hrs per week)",
            "In training (apprentice, student)",
            "In family business",
            "In household",
            "Without Profession",
            "Disabled",
            "Unemployed",
            "Retired",
            "Other (e.g., further training, sabbatical)"
        ]
    },
    "Social Class": {
        "DESCRIPTION": "Social Class according to Kriesi",
        "OPTIONS": [
            "Farmers",
            "Other Self-Employed",
            "Others",
            "Routine Non-Manual Workers",
            "Semiskilled and unskilled workers",
            "Service Class Employees",
            "Skilled Workers/Foremen"
        ]
    },
    "Political Interest": {
        "DESCRIPTION": "Political Interest",
        "OPTIONS": [
            "Not interested at all",
            "Rather not interested",
            "Rather interested",
            "Very interested"
        ]
    },
    "Materialism/Post-Materialism": {
        "DESCRIPTION": "Index of Post-Materialism",
        "OPTIONS": [
            "Materialist View",
            "Mixed materialist view",
            "Mixed post-materialist view",
            "Post-Materialist View"
        ]
    },
    "Political Participation": {
        "DESCRIPTION": "Participation in last election",
        "OPTIONS": ["Yes", "No"]
    },
    "Party Affiliation": {
        "DESCRIPTION": "Party Affiliation",
        "OPTIONS": ["Yes", "No"]
    }
}

# Create the variable layout with a styled table
variable_layout = html.Div([
    html.H1("Variable Descriptions ", style={'textAlign': 'center', 'marginTop': '20px'}),
    html.Table([
        html.Thead([
            html.Tr([
                html.Th("Variable", style={'font-weight': 'bold', 'background-color': '#f2f2f2'}),
                html.Th("Description", style={'background-color': '#f2f2f2'}),
                html.Th("Options", style={'background-color': '#f2f2f2'})
            ])
        ]),
        html.Tbody([
            html.Tr([
                html.Td(key, style={'border': '1px solid grey'}),
                html.Td(data["DESCRIPTION"], style={'border': '1px solid grey'}),
                html.Td(html.Ul([html.Li(option) for option in data["OPTIONS"]]), style={'border': '1px solid grey'})
            ], style={'border': '1px solid grey'}) for key, data in variable_data.items()
        ])
    ],
    style={'borderCollapse': 'collapse', 'width': '100%', 'border': '1px solid grey'}),
])



#  Create a sample DataFrame with adjustments
def create_sample_line(adjustments):
    data = {
        'age': [adjustments.get('age', np.random.randint(18, 80))],
        'sex': [adjustments.get('sex', 'male')],
        'educ': [adjustments.get('educ', 'primary_education')],
        'religion': [adjustments.get('religion', 'none')],
        'income': [adjustments.get('income', 'middle income')],
        'sg1': [adjustments.get('sg1', 'German spoken')],
        'sg9': [adjustments.get('sg9', 'urban')],
        'sc1': [adjustments.get('sc1', 'full-time')],
        'sc7b': [adjustments.get('sc7b', 'service class employees')],
        'pi1': [adjustments.get('pi1', 'rather interested')],
        'pm3': [adjustments.get('pm3', 'mixed post-materialism')],
        'vp1': [adjustments.get('vp1', 'no')],
        'pid1': [adjustments.get('pid1', 'no')]
    }
    return pd.DataFrame(data)

# Callback to update the predicted class histogram based on user inputs
@app.callback(
    Output('predicted-class-histogram', 'figure'),
    [Input('button', 'n_clicks')],
    [State('mean-age', 'value'),
     State('sex-dropdown', 'value'),
     State('education-dropdown', 'value'),
     State('religion-dropdown', 'value'),
     State('income-dropdown', 'value'),
     State('sg1-dropdown', 'value'),
     State('sg9-dropdown', 'value'),
     State('sc1-dropdown', 'value'),
     State('sc7b-dropdown', 'value'),
     State('pi1-dropdown', 'value'),
     State('pm3-dropdown', 'value'),
     State('vp1-dropdown', 'value'),
     State('pid1-dropdown', 'value')]
)
def update_predicted_class_histogram(n_clicks, mean_age, sex_value, education_value, religion_value, income_value, sg1_value,
                                     sg9_value, sc1_value, sc7b_value, pi1_value, pm3_value,
                                     vp1_value, pid1_value):
    if n_clicks is None:
        return go.Figure()  # Return an empty figure if button hasn't been clicked

    try:
        # Validate and parse mean age
        try:
            age = int(mean_age) if mean_age else np.random.randint(18, 80)
        except ValueError:
            age = np.random.randint(18, 80)

        # Create adjustments dictionary for create_sample_line function
        adjustments = {
            'age': age,
            'sex': sex_value,
            'educ': education_value,
            'religion': religion_value,
            'income': income_value,
            'sg1': sg1_value,
            'sg9': sg9_value,
            'sc1': sc1_value,
            'sc7b': sc7b_value,
            'pi1': pi1_value,
            'pm3': pm3_value,
            'vp1': vp1_value,
            'pid1': pid1_value
        }

        # Remove None values from adjustments
        adjustments = {k: v for k, v in adjustments.items() if v is not None}

        input_data = create_sample_line(adjustments)
        print("Input data:", input_data)  # Debug print

        prepared_data = prepare_input_data(input_data)
        print("Prepared data:", prepared_data)  # Debug print

        predicted_probabilities = gbc.predict_proba(prepared_data)[0]
        print("Predicted probabilities:", predicted_probabilities)  # Debug print

        colors = ['red', 'blue', 'green', 'purple', 'orange', 'pink', 
                  'brown', 'cyan', 'magenta', 'yellow', "orange"]

        fig = go.Figure(data=[go.Bar(x=gbc.classes_, y=predicted_probabilities, marker_color=colors)])
        fig.update_layout(title="Predicted Party Probabilities", xaxis_title="Party", yaxis_title="Probability")
        return fig

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        print("Adjustments:", adjustments)
        print(traceback.format_exc())
        return go.Figure(data=[go.Scatter(x=[0], y=[0])],
                         layout=go.Layout(title=f"Error: {str(e)}"))

# Callback to update page content based on URL
@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")]
)
def render_page_content(pathname):
    if pathname == "/":
        return home_layout
    elif pathname == "/about":
        return about_layout
    elif pathname == "/model":
        return model_layout
    elif pathname == "/variables":
        return variable_layout
    return html.Div(
        [
            html.H1("404: Not found", className="text-danger"),
            html.P(f"The pathname {pathname} was not recognized..."),
        ],
        className="p-3 bg-light rounded-3",
    )



In [None]:
# run app
if __name__ == '__main__':
    app.run_server(port=8000, debug=True)

Input data:    age     sex               educ religion         income            sg1  \
0   45  female  primary_education     none  middle income  German spoken   

     sg9        sc1                     sc7b                pi1  \
0  urban  full-time  service class employees  rather interested   

             pm3 vp1 pid1  
0  mixed postmat  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                     sc7b                pi1  \
0  urban  full-time  service class employees  rather interested   

             pm3 vp1 pid1  
0  mixed postmat  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                  


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                     sc7b                pi1  \
0  urban  full-time  service class employees  rather interested   

             pm3 vp1 pid1  
0  mixed postmat  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                  


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                     sc7b                pi1  \
0  urban  full-time  service class employees  rather interested   

             pm3 vp1 pid1  
0  mixed postmat  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                  


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                 sc7b                pi1            pm3  \
0  urban  full-time  other self-employed  rather interested  mixed postmat   

  vp1 pid1  
0  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                 sc7b                pi1            pm3  \
0  urban  full-time  other self-employed  rather interested  mixed postmat   

  vp1 pid1  
0  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                 sc7b                pi1            pm3  \
0  urban  full-time  other self-employed  rather interested  mixed postmat   

  vp1 pid1  
0  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     none  rather low income  German spoken   

     sg9        sc1                 sc7b                pi1            pm3  \
0  urban  full-time  other self-employed  rather interested  mixed postmat   

  vp1 pid1  
0  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names



Input data:    age     sex               educ religion             income            sg1  \
0   45  female  primary_education     prot  rather low income  German spoken   

     sg9        sc1                 sc7b                pi1            pm3  \
0  urban  full-time  other self-employed  rather interested  mixed postmat   

  vp1 pid1  
0  no   no  
Preparing input data...
Prepared data shape: (1, 105)
Prepared data columns: Index(['sex', 'age', 'educ', 'income', 'religion', 'sg1', 'sg9', 'sc1', 'sc7b',
       'pi1',
       ...
       'pi1 pm3', 'pi1 vp1', 'pi1 pid1', 'pm3^2', 'pm3 vp1', 'pm3 pid1',
       'vp1^2', 'vp1 pid1', 'pid1^2', 'age_income_interaction'],
      dtype='object', length=105)
Prepared data types: sex                       float64
age                       float64
educ                      float64
income                    float64
religion                  float64
                           ...   
pm3 pid1                  float64
vp1^2                     float


X has feature names, but GradientBoostingClassifier was fitted without feature names

