# Connect to the Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

# Import Libraries

In [None]:
import openpyxl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, SelectMultiple
from matplotlib.patches import Rectangle
from matplotlib.ticker import FuncFormatter
import os

# Data Processing for Gender Distribution across Legislatures

In [None]:
# Load the Excel file
#file_path = "/content/gdrive/MyDrive/CLD/2_DropUpdated/Core_updated.xlsx"
file_path = "/content/gdrive/MyDrive/2_DropUpdated/Core_updated.xlsx"
workbook = openpyxl.load_workbook(file_path)

# Get the sheet names and store them in a tuple
sheet_names = tuple(workbook.sheetnames)

# Initialize an empty gender dictionary
gender_dict = {}

# Iterate through each sheet
for sheet_name in workbook.sheetnames:
    # Get the current sheet
    sheet = workbook[sheet_name]

    # Initialize counts for male and female
    male_count = 0
    female_count = 0

    # Iterate through rows starting from the second row (assuming headers are in the first row)
    for row in sheet.iter_rows(min_row=2, values_only=True):
        sex = row[5]  # Assuming 'sex' column is at index 5 (0-based index)
        if sex == 'male':
            male_count += 1
        elif sex == 'female':
            female_count += 1

    # Store counts in the gender dictionary
    gender_dict[sheet_name] = (male_count, female_count)

# Print the sheet names tuple and the gender dictionary
print("Legislature Names:", sheet_names)
print("Gender Dictionary:", gender_dict)

In [None]:
# Sort the gender dictionary based on the sum of male and female counts
gender_dict = dict(sorted(gender_dict.items(), key=lambda item: sum(item[1])))

# Print the sorted gender dictionary
print("Sorted Gender Dictionary:", gender_dict)

# Gender Distribution across Legislatures

In [None]:
def plot_gender_distribution_selected3(selected_countries, selected_gender):
    # Extracting data for selected countries
    selected_data = {country: gender_dict[country] for country in selected_countries}
    countries = list(selected_data.keys())

    # Assuming each entry in gender_dict is a tuple or list (male_count, female_count)
    male_counts = [selected_data[country][0] for country in countries]
    female_counts = [selected_data[country][1] for country in countries]

    # Number of bars
    n_groups = len(countries)
    index = np.arange(n_groups)
    bar_width = 0.35

    # Create grouped bar chart
    fig, ax = plt.subplots(figsize=(20, 15))

    if selected_gender == 'Both' or selected_gender == 'Male':
        male_bar = ax.bar(index, male_counts, bar_width, label='Male', color='#3498DB')
        add_counts_labels(ax, male_bar)  # Add count labels for Male

    if selected_gender == 'Both' or selected_gender == 'Female':
        # Adjust position for 'Both' option
        female_index = index + bar_width if selected_gender == 'Both' else index
        female_bar = ax.bar(female_index, female_counts, bar_width, label='Female', color='#E74C3C')
        add_counts_labels(ax, female_bar)  # Add count labels for Female

    # Add labels, title, and legend
    ax.set_xlabel('Legislatures', fontsize=30)
    ax.set_ylabel('Count', fontsize=30)
    ax.set_title('Gender Distribution across Legislatures', fontsize=40)
    ax.set_xticks(index + bar_width / 2)
    ax.set_xticklabels(countries, rotation=90, fontsize=13)
    legend = ax.legend(fontsize=14, prop={'size': 20})
    legend.set_title('Gender', prop={'size': 20})
    ax.tick_params(axis='y', labelsize=16)



    # Adjust the y-axis limit
    ax.set_ylim(0, max(max(male_counts), max(female_counts)) + 10)  # Example adjustment

    plt.tight_layout()
    plt.show()

def add_counts_labels(ax, bars):
    for bar in bars:
        height = bar.get_height()
        ax.annotate('{}'.format(height),
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=12)


# Data Processing for Social Media Distribution across Legislatures

In [None]:
# Load the Excel file
file_path = "/content/gdrive/MyDrive/2_DropUpdated/Social_updated.xlsx"
xls = pd.ExcelFile(file_path)

# Define the columns to consider
social_media_columns = ['twitter', 'facebook', 'youtube', 'instagram', 'website', 'linkedin']

# Initialize an empty dictionary to store the counts
social_media_dict = {}

# Iterate through each sheet
for sheet_name in xls.sheet_names:
    # Read the sheet into a DataFrame
    df = pd.read_excel(file_path, sheet_name)

    # Initialize counts tuple
    counts_tuple = ()

    # Count the non-null values for each column
    for column in social_media_columns:
        if column in df.columns:
            counts_tuple += (df[column].count(),)
        else:
            counts_tuple += (np.nan,)

    # Add the counts tuple to the main dictionary
    social_media_dict[sheet_name] = counts_tuple

# Print the dictionary
print("Social Media Dictionary:", social_media_dict)


In [None]:
# Sort the column_counts dictionary based on the 'twitter' counts
social_media_dict = dict(sorted(social_media_dict.items(), key=lambda x: x[1][social_media_columns.index('twitter')]))

# Print the sorted dictionary
print("Sorted Social Media Dictionary:", social_media_dict)

# Social Media Distribution across Legislatures

In [None]:
def plot_social_media_distribution_selected3(selected_countries, selected_social_media_option):
    # Extracting data for selected countries
    selected_data = {country: social_media_dict[country] for country in selected_countries}
    countries = list(selected_data.keys())

    # Define social media options
    social_media_options = ['All', 'Twitter', 'Facebook', 'YouTube', 'Instagram', 'Website', 'LinkedIn']
    colors = ['#47a0b3', '#a2d9a4', '#edf8a3', '#fee999', '#fca55d', '#e2514a']

    # Create bar chart
    fig, ax = plt.subplots(figsize=(15, 8))

    # Define index here based on the number of countries
    n_groups = len(countries)
    index = np.arange(n_groups)

    if selected_social_media_option == 'All':
        # Plot for all social media platforms
        bar_width = 0.15
        bars = []

        for i, option in enumerate(social_media_options[1:], start=1):  # Skip 'All'
            values = np.array([data[i-1] for data in selected_data.values()])
            bars.append(ax.bar(index + (i-1) * bar_width, values, bar_width, label=option, color=colors[i-1], edgecolor='black'))

        ax.set_xticks(index + bar_width * (len(social_media_options) - 2) / 2)
    else:
        # Plot for a specific social media platform
        option_index = social_media_options.index(selected_social_media_option) - 1  # Adjust index for 'All'
        values = np.array([data[option_index] for data in selected_data.values()])
        bars = [ax.bar(index, values, 0.35, label=selected_social_media_option, color=colors[option_index], edgecolor='black')]
        ax.set_xticks(index)

    # Add labels, title, and legend
    ax.set_xlabel('Legislatures', fontsize=30)
    ax.set_ylabel('Count', fontsize=30)
    ax.set_title(f'Social Media Distribution across Legislatures - {selected_social_media_option}', fontsize=40)
    ax.set_xticklabels(countries, rotation=90, fontsize=13)
    ax.tick_params(axis='y', labelsize=16)
    legend = ax.legend(fontsize=14, prop={'size': 20})
    legend.set_title('Social Media', prop={'size': 20})

    # Adding counts within each bar
    for bars_per_label in bars:
        for bar in bars_per_label:
            bar_height = bar.get_height()
            if not np.isnan(bar_height):  # Check if bar_height is not NaN
                ax.text(bar.get_x() + bar.get_width() / 2, bar_height,
                        f'{int(bar_height)}', ha='center', va='bottom', fontsize=12)

    # Set ylim considering non-finite values
    max_value = np.nanmax([bar.get_height() for bar_group in bars for bar in bar_group])
    ax.set_ylim(0, max_value + 10 if np.isfinite(max_value) else 100)

    plt.tight_layout()
    plt.show()



# Data Processing for Religion Distribution across Legislatures

In [None]:
# Load the Excel file
#file_path = "/content/gdrive/MyDrive/CLD/2_DropUpdated/Core_updated.xlsx"
file_path = "/content/gdrive/MyDrive/2_DropUpdated/Core_updated.xlsx"
workbook = openpyxl.load_workbook(file_path)

# Get the sheet names and store them in a tuple
sheet_names = tuple(workbook.sheetnames)



# Mapping of individual religions to broader categories
religion_mapping = {
    'Christianity': ['catholicism', 'orthodox eastern', 'protestantism', 'protestantism hussite', 'protestantism methodist', 'protestantism lutheran', 'protestantism anglican', 'protestantism anglicanism', 'anglicanism', 'protestantism baptism', 'protestantism baptist', 'protestantism presbyterian', 'protestantism adventist', 'protestantism pentecostal', 'protestantism quaker', 'protestantism restorationism', 'protestantism reformed', 'protestantism evangelical', 'protestantism anabaptism', 'protestantism arminianism', 'protestantism nontrinitarian', 'protestantism unitarian', 'protestantism christian science', 'protestantism non-denominational', 'protestantism apostolic', 'protestantism proto'],
    'Islam': ['islam'],
    'Hinduism': ['hindu'],
    'Buddhism': ['buddhism', 'nichiren shu', 'jodo_shinshu', 'soka gakkai'],
    'Judaism': ['judaism', 'orthodox', 'conservative', 'reform'],
}
#Others include : atheism,honganji-ha,confucianism,happy science,tenrikyo,yazidism,alevism,agnosticism,sikhism,zoroastrianism,druze,candomblé,bahá'í_faith

# Define the columns to consider
religion_columns = ['Christianity', 'Islam', 'Hinduism', 'Buddhism', 'Judaism', 'Others']

# Initialize a dictionary to store country-wise religion counts
religion_dict = {}

# Iterate through each sheet
for sheet_name in workbook.sheetnames:
    # Get the current sheet
    sheet = workbook[sheet_name]

    # Initialize country-wise religion counts
    country_religion_count = {'Christianity': 0, 'Islam': 0, 'Hinduism': 0, 'Buddhism': 0, 'Judaism': 0, 'Others': 0}


    # Iterate through rows starting from the second row (assuming headers are in the first row)
    for row in sheet.iter_rows(min_row=2, values_only=True):
        religion = row[6]  # Assuming 'sex' column is at index 5 (0-based index)
        # Check the mapping to determine the broader category
        for category, religions_list in religion_mapping.items():
            if religion in religions_list:
                country_religion_count[category] += 1
                break
        else:
            country_religion_count['Others'] += 1


    # Initialize counts tuple
    counts_tuple = ()

    for column in religion_columns:
      counts_tuple += (country_religion_count[column],)


    # Store country-wise religion counts in the dictionary
    religion_dict[sheet_name] = counts_tuple

# Print the sheet names tuple and the country-wise religion counts
print("Legislature Names:", sheet_names)
print("Country-wise Religion Counts:", religion_dict)

# Religion Distribution across Legislatures

In [None]:
def plot_religion_distribution_selected(selected_countries, selected_religion_option):
    # Extracting data for selected countries
    selected_data = {country: religion_dict[country] for country in selected_countries}
    countries = list(selected_data.keys())

    # Define social media options
    religion_options = ['All', 'Christianity', 'Islam', 'Hinduism', 'Buddhism', 'Judaism']#, 'Others']
    colors = ['#6b8fb4', '#c5e17a', '#f58fcf', '#a3d5e2', '#ffbb6e']#, '#4e7f5e']
#['#47a0b3', '#a2d9a4', '#edf8a3', '#fee999', '#fca55d', '#e2514a']

    # Create bar chart
    fig, ax = plt.subplots(figsize=(15, 8))

    # Define index here based on the number of countries
    n_groups = len(countries)
    index = np.arange(n_groups)

    if selected_religion_option == 'All':
        # Plot for all social media platforms
        bar_width = 0.15
        bars = []

        for i, option in enumerate(religion_options[1:], start=1):  # Skip 'All'
            values = np.array([data[i-1] for data in selected_data.values()])
            bars.append(ax.bar(index + (i-1) * bar_width, values, bar_width, label=option, color=colors[i-1], edgecolor='black'))

        ax.set_xticks(index + bar_width * (len(religion_options) - 2) / 2)
    else:
        # Plot for a specific social media platform
        option_index = religion_options.index(selected_religion_option) - 1  # Adjust index for 'All'
        values = np.array([data[option_index] for data in selected_data.values()])
        bars = [ax.bar(index, values, 0.35, label=selected_religion_option, color=colors[option_index], edgecolor='black')]
        ax.set_xticks(index)

    # Add labels, title, and legend
    ax.set_xlabel('Legislatures', fontsize=30)
    ax.set_ylabel('Count', fontsize=30)
    ax.set_title(f'Religion Distribution across Legislatures - {selected_religion_option}', fontsize=40)
    ax.set_xticklabels(countries, rotation=90, fontsize=13)
    ax.tick_params(axis='y', labelsize=16)
    legend = ax.legend(fontsize=14, prop={'size': 20})
    legend.set_title('Religion', prop={'size': 20})

    # Adding counts within each bar
    for bars_per_label in bars:
        for bar in bars_per_label:
            bar_height = bar.get_height()
            if not np.isnan(bar_height):  # Check if bar_height is not NaN
                ax.text(bar.get_x() + bar.get_width() / 2, bar_height,
                        f'{int(bar_height)}', ha='center', va='bottom', fontsize=12)

    # Set ylim considering non-finite values
    max_value = np.nanmax([bar.get_height() for bar_group in bars for bar in bar_group])
    ax.set_ylim(0, max_value + 10 if np.isfinite(max_value) else 100)

    plt.tight_layout()
    plt.show()


# Data Processing for Traffic Distribution across Legislatures

In [None]:
# Define the main directory
traffic_directory = "/content/gdrive/MyDrive/Traffic/"
#"/content/gdrive/MyDrive/2_DropUpdated/Core_updated.xlsx"

# Define the mapping of current column names to new names
column_mapping = {
    'isr_traffic.csv': 'Israel',
    'sco_traffic.csv': 'Scotland',
    'can_traffic.csv': 'Canada',
    'bra_traffic.csv': 'Brazil',
    'tur_traffic.csv': 'Turkey',
    'jpn_traffic.csv': 'Japan',
    'usa_senate_traffic.csv': 'United States Senate',
    'gbr_traffic.csv': 'United Kingdom',
    'deu_traffic.csv': 'Germany',
    'fra_traffic.csv': 'France',
    'usa_house_traffic.csv': 'United States House',
    'ita_house_traffic.csv': 'Italy House',
    'cze_traffic.csv': 'Czech Republic',
    'nld_traffic.csv': 'Netherlands',
    'ita_senate_traffic.csv': 'Italy Senate',
    'irl_traffic.csv': 'Ireland',
    'esp_traffic.csv': 'Spain',
    'aut_traffic.csv': 'Austria'
}

# Initialize an empty dictionary to store the traffic_dict
traffic_dict = {}

# Iterate through each file in the directory
for filename in os.listdir(traffic_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(traffic_directory, filename)

        df = pd.read_csv(file_path)

        if 'date' in df.columns and 'traffic' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
            df['year'] = df['date'].dt.year

            # Group by year and calculate the sum of traffic
            traffic_by_year = df.groupby('year')['traffic'].sum()

            # Store the sums in the dictionary
            traffic_dict[filename] = traffic_by_year

# Create a DataFrame from the dictionary
traffic_df = pd.DataFrame(traffic_dict)

# Replace NA values with NaN
traffic_df = traffic_df.replace({pd.NA: np.nan})

# Rename columns according to the mapping
traffic_df = traffic_df.rename(columns=column_mapping)

traffic_df

# Traffic Distribution across Legislatures

In [None]:
def plot_traffic(selected_countries):
    colors = ['#ff1493', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fa8072', '#800000', '#aaffc3', '#808000', '#ff0000']

    plt.figure(figsize=(12, 6))
    for i, country in enumerate(selected_countries):
        plt.plot(traffic_df.index, traffic_df[country], label=country, color=colors[i % len(colors)])

    plt.title('Traffic on Wikipedia across Legislatures')
    plt.xlabel('Year')
    plt.ylabel('Page Hits')
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.xticks(traffic_df.index, rotation=45)
    plt.tight_layout()
    plt.show()

# Get the list of all countries
all_countries = list(traffic_df.columns)

# Create the dropdown widget
country_selection = SelectMultiple(options=all_countries, value=all_countries, description='Legislatures')

# Create the interactive plot
interactive_plot = interact(plot_traffic, selected_countries=country_selection)


# CLD Interface

In [None]:
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import SelectMultiple, interact


# Assuming all_countries is your list of countries
all_countries = sorted(list(gender_dict.keys()))  # Replace with your list of countries


# Country Selection
country_checkboxes = [widgets.Checkbox(value=False, description=country) for country in all_countries]
country_grid = widgets.GridBox(country_checkboxes, layout=widgets.Layout(grid_template_columns="repeat(auto-fill, minmax(150px, 1fr))"))


# Plot Type Selection Checkboxes
gender_plot_checkbox = widgets.Checkbox(description='Gender Distribution', value=True)
social_media_plot_checkbox = widgets.Checkbox(value=True, description='Social Media Distribution')
religion_plot_checkbox = widgets.Checkbox(value=True, description='Religion Distribution')
traffic_plot_checkbox = widgets.Checkbox(value=True, description='Traffic Plot')

# Internal Selection for Gender Distribution
gender_selector = widgets.RadioButtons(
    description='Gender:',
    options=['Both', 'Male', 'Female'],
    value='Both',
    disabled=False
)

# Internal Selection for Social Media Distribution
social_media_selector = widgets.RadioButtons(
    options=['All', 'Twitter', 'Facebook', 'YouTube', 'Instagram', 'Website', 'LinkedIn'],
    value='All',
    description='Social Media Options:',
    disabled=False
)

# Internal Selection for Religion Distribution
religion_selector = widgets.RadioButtons(
    options=['All', 'Christianity', 'Islam', 'Hinduism', 'Buddhism', 'Judaism'],
    value='All',
    description='Religion Options:',
    disabled=False
)

# Creating vertical boxes (VBox) for each plot type and its corresponding options
gender_plot_box = widgets.VBox([gender_plot_checkbox, gender_selector])
social_media_plot_box = widgets.VBox([social_media_plot_checkbox, social_media_selector])
religion_plot_box = widgets.VBox([religion_plot_checkbox, religion_selector])
traffic_plot_box = widgets.VBox([traffic_plot_checkbox])

# Arranging the plot type boxes side by side in a horizontal layout (HBox)
plot_type_layout = widgets.HBox([gender_plot_box, social_media_plot_box, religion_plot_box, traffic_plot_box])

def get_selected_countries():
    return [cb.description for cb in country_checkboxes if cb.value]

def update_plot(button):
    selected_countries = get_selected_countries()
    gender_plot_selected = gender_plot_checkbox.value
    social_media_plot_selected = social_media_plot_checkbox.value
    religion_plot_selected = religion_plot_checkbox.value
    traffic_plot_selected = traffic_plot_checkbox.value

    if gender_plot_selected:
        selected_gender = gender_selector.value
        fig_gender = plt.figure(figsize=(10, 8))
        plot_gender_distribution_selected3(selected_countries, selected_gender)
        display(fig_gender)

    if social_media_plot_selected:
        selected_social_media_option = social_media_selector.value
        fig_social = plt.figure(figsize=(10, 8))
        plot_social_media_distribution_selected3(selected_countries, selected_social_media_option)
        display(fig_social)

    if religion_plot_selected:
        selected_religion_option = religion_selector.value
        fig_religion = plt.figure(figsize=(10, 8))
        plot_religion_distribution_selected(selected_countries, selected_religion_option)
        display(fig_religion)

    if traffic_plot_selected:
        fig_traffic = plt.figure(figsize=(12, 6))
        plot_traffic(selected_countries)
        display(fig_traffic)

# Button to update the plot
update_button = widgets.Button(description="Update Plots")
update_button.on_click(update_plot)


# Heading for Country Selection
country_heading = widgets.HTML("<h3>Select Countries to be compared</h3>")
display(country_heading)


# Display everything
display(country_grid)

# Heading for Plot Selection
plot_heading = widgets.HTML("<h3>Choose the plots and internal selection as needed</h3>")
display(plot_heading)

display(plot_type_layout)

# Heading for button
button_heading = widgets.HTML("<h3>Click on Update Plots button to apply selection</h3>")
display(button_heading)

display(update_button)


In [None]:
file_path = "/content/gdrive/MyDrive/2_DropUpdated/Political_updated.xlsx"
xls = pd.ExcelFile(file_path)
country_const_party = {}

for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name)
    constituency_dict = {}

    grouped_df = df.groupby(['party', 'constituency']).size()

    for (party, constituency), count in grouped_df.items():
        if party not in constituency_dict:
            constituency_dict[party] = {}
        constituency_dict[party][constituency] = count

    country_const_party[sheet_name] = constituency_dict


#country_const_party


In [None]:
!pip install dash

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

app = dash.Dash(__name__)

# Updated function to create a pie chart for multiple countries
def create_pie_charts(countries):
    figures = {}
    for country_name in countries:
        data = country_const_party[country_name]  # Assuming country_const_party is defined
        labels = list(data.keys())
        values = [sum(data[party].values()) for party in labels]
        figures[country_name] = {
            'data': [go.Pie(labels=labels, values=values)],
            'layout': go.Layout(title=f'{country_name} Party Distribution')
        }
    return figures

# Update the app layout to dynamically include pie charts for selected countries in a 2-column grid
selected_countries = ['Germany','Spain']  # Variable list of countries
pie_charts = create_pie_charts(selected_countries)

app.layout = html.Div([
    html.Div([
        html.Div([
            dcc.Graph(id=f'pie-chart-{country.lower()}', figure=pie_charts[country])
        ], style={'width': '50%', 'display': 'inline-block'})  # Adjust style for 2-column grid
        for country in selected_countries
    ]),
    dcc.Graph(id='bubble-chart')
], style={'width': '100%'})

# Callback function remains the same as previously defined
@app.callback(
    Output('bubble-chart', 'figure'),
    [Input(f'pie-chart-{country.lower()}', 'clickData') for country in selected_countries],
    [dash.dependencies.State('bubble-chart', 'figure')]
)
def update_bubble_chart(*args):
    ctx = dash.callback_context
    current_figure = args[-1]  # The last argument is the current figure state
    inputs = args[:-1]  # All other arguments are clickData inputs

    if not ctx.triggered or not any(inputs):
        return dash.no_update

    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
    country = trigger_id.replace('pie-chart-', '').title()

    clickData = ctx.triggered[0]['value']
    party = clickData['points'][0]['label']
    data = country_const_party[country][party]
    constituencies = list(data.keys())
    seats = list(data.values())

    # Normalize the seat count for bubble size
    max_bubble_size = 40
    max_seat_count = max(seats)
    sizes = [(seat / max_seat_count) * max_bubble_size for seat in seats]

    bubble_figure = {
        'data': [
            go.Scatter(
                x=constituencies,
                y=[1] * len(constituencies),
                mode='markers',
                marker=dict(
                    size=sizes,
                    sizemode='area',
                    sizeref=2.*max(sizes)/(40.**2),
                    sizemin=4
                ),
                text=[f'{constituency}: {seat} seats' for constituency, seat in zip(constituencies, seats)],
                hoverinfo='text'
            )
        ],
        'layout': go.Layout(
            title=f'{country} Constituencies with {party} Representation',
            xaxis={'title': 'Constituencies', 'tickangle': -45},
            yaxis={'title': 'Number of Seats', 'showticklabels': False},
            hovermode='closest'
        )
    }

    return bubble_figure

if __name__ == '__main__':
    app.run_server(debug=True)
