<a href="https://colab.research.google.com/github/lauramauricio/election-prediction-webapp/blob/main/Prediction_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
#Import drive
from google.colab import drive
#Mount Google Drive
ROOT="/content/drive"
drive.mount(ROOT, force_remount=True)

Mounted at /content/drive


In [26]:
!pip install dash

from dash import Dash, html, dcc, callback, Output, Input
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px

# Load the dataset
data_path = '/content/merged_dataset.csv'  # Replace with the actual path to your dataset
df = pd.read_csv(data_path)

# Preprocess the data
df = pd.get_dummies(df)

# Fill missing values
df = df.dropna()  # or df.fillna(df.mean())

# Split the data into features and target
X = df.drop('total_gewahlt', axis=1)
y = df['total_gewahlt']

# Train the Random Forest model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
predictions = rf_model.predict(X_test)

# Print classification report
print(classification_report(y_test, predictions))






Columns (11,14,19,20) have mixed types. Specify dtype option on import or set low_memory=False.



              precision    recall  f1-score   support

        39.0       1.00      1.00      1.00      1994
        41.0       1.00      1.00      1.00       606
        43.0       1.00      1.00      1.00      2876
        46.0       1.00      1.00      1.00      1953
        51.0       1.00      1.00      1.00      1242
        52.0       1.00      1.00      1.00      1760
        54.0       1.00      1.00      1.00      2284
        55.0       1.00      1.00      1.00       376

    accuracy                           1.00     13091
   macro avg       1.00      1.00      1.00     13091
weighted avg       1.00      1.00      1.00     13091



In [None]:

# Initialize the Dash app
app = Dash(__name__)

# Define the app layout
app.layout = html.Div([
    html.H1(children='Voteshare Prediction App', style={'textAlign': 'center'}),
    dcc.Dropdown(
        id='characteristics-dropdown',
        options=[{'label': col, 'value': col} for col in X.columns],
        value=[X.columns[0]],  # Default to the first column, must be a list for multi=True
        multi=True
    ),
    dcc.Graph(id='prediction-graph')
])

# Define callback to update graph based on selected characteristics
@callback(
    Output('prediction-graph', 'figure'),
    Input('characteristics-dropdown', 'value')
)
def update_graph(selected_features):
    print("Selected features:", selected_features)  # Debugging statement
    if not selected_features:
        selected_features = [X.columns[0]]  # Default to first feature if none selected

    try:
        df_filtered = df[selected_features]
        print("Filtered DataFrame:", df_filtered.head())  # Debugging statement
        predictions = rf_model.predict(df_filtered)
        df_filtered['Predicted Vote'] = predictions

        # Create a scatter plot to visualize the predictions
        fig = px.scatter(df_filtered, x=selected_features[0], y='Predicted Vote', title='Predicted Voteshare')
    except Exception as e:
        print("Error during prediction or graph update:", e)
        fig = px.scatter(title='Error in generating plot')

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8050)
