# Import Libraries

In [1]:
import openpyxl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, SelectMultiple
from matplotlib.patches import Rectangle
from matplotlib.ticker import FuncFormatter
import os
import json

# Data Processing for Gender Distribution across Legislatures

In [26]:
# Load the Excel file
file_path = "Core_updated.xlsx"
workbook = openpyxl.load_workbook(file_path)

# Get the sheet names and store them in a tuple
sheet_names = tuple(workbook.sheetnames)

# Initialize an empty gender dictionary
gender_dict = {}

# Iterate through each sheet
for sheet_name in workbook.sheetnames:
    # Get the current sheet
    sheet = workbook[sheet_name]

    # Initialize counts for male and female
    male_count = 0
    female_count = 0

    # Iterate through rows starting from the second row
    for row in sheet.iter_rows(min_row=2, values_only=True):
        sex = row[5]  
        if sex == 'male':
            male_count += 1
        elif sex == 'female':
            female_count += 1

    # Store counts in the gender dictionary
    gender_dict[sheet_name] = (male_count, female_count)
    
gender_dict = dict(sorted(gender_dict.items(), key=lambda item: sum(item[1])))

# Print the sheet names tuple and the gender dictionary
#print("Legislature Names:", sheet_names)
#print("Gender Dictionary:", gender_dict)

# Save gender_dict as a JSON file
with open('gender_dict.json', 'w') as json_file:
    json.dump(gender_dict, json_file)

print("gender_dict.json file has been created successfully.")

gender_dict.json file has been created successfully.


# Data Processing for Social Media Distribution across Legislatures

In [9]:
# Load the Excel file
file_path = "Social_updated.xlsx"
xls = pd.ExcelFile(file_path)

# Define the columns to consider
social_media_columns = ['twitter', 'facebook', 'youtube', 'instagram', 'website', 'linkedin']

# Initialize an empty dictionary to store the counts
social_media_dict = {}

# Iterate through each sheet
for sheet_name in xls.sheet_names:
    # Read the sheet into a DataFrame
    df = pd.read_excel(file_path, sheet_name)

    # Get the country name
    country_name = sheet_name

    # Get counts for each social media platform
    counts = []
    for column in social_media_columns:
        if column in df.columns:
            # Count the non-null values for each column
            counts.append(int(df[column].count()))
        else:
            counts.append(0) 

    # Add counts to the dictionary with country name as key
    social_media_dict[country_name] = counts

# Save social_media_dict as a JSON file
with open('social_media_dict.json', 'w') as json_file:
    json.dump(social_media_dict, json_file)

print("social_media_dict.json file has been created successfully.")


social_media_dict.json file has been created successfully.


# Data Processing for Traffic on Wikipedia across Legislatures

In [20]:
# Define the main directory
traffic_directory = "Traffic/"

# Define the mapping of current column names to new names
column_mapping = {
    'isr_traffic.csv': 'Israel',
    'sco_traffic.csv': 'Scotland',
    'can_traffic.csv': 'Canada',
    'bra_traffic.csv': 'Brazil',
    'tur_traffic.csv': 'Turkey',
    'jpn_traffic.csv': 'Japan',
    'usa_senate_traffic.csv': 'United States Senate',
    'gbr_traffic.csv': 'United Kingdom',
    'deu_traffic.csv': 'Germany',
    'fra_traffic.csv': 'France',
    'usa_house_traffic.csv': 'United States House',
    'ita_house_traffic.csv': 'Italy House',
    'cze_traffic.csv': 'Czech Republic',
    'nld_traffic.csv': 'Netherlands',
    'ita_senate_traffic.csv': 'Italy Senate',
    'irl_traffic.csv': 'Ireland',
    'esp_traffic.csv': 'Spain',
    'aut_traffic.csv': 'Austria'
}

# Initialize an empty dictionary to store the traffic_dict
traffic_dict = {}

# Get the range of years across all legislatures
min_year = float('inf')
max_year = float('-inf')

# Iterate through each file in the directory
for filename in os.listdir(traffic_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(traffic_directory, filename)

        df = pd.read_csv(file_path)

        if 'date' in df.columns and 'traffic' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
            df['year'] = df['date'].dt.year

            # Update the minimum and maximum years
            min_year = min(min_year, df['year'].min())
            max_year = max(max_year, df['year'].max())

            # Group by year and calculate the sum of traffic
            traffic_by_year = df.groupby('year')['traffic'].sum()

            # Store the sums in the dictionary
            legislature_name = column_mapping[filename]
            traffic_dict[legislature_name] = traffic_by_year

# Create a DataFrame with years as index
index = pd.RangeIndex(start=min_year, stop=max_year+1, name='Year')
traffic_df = pd.DataFrame(index=index)

# Fill the DataFrame with traffic data
for legislature, data in traffic_dict.items():
    traffic_df[legislature] = data

# Replace NaN values with empty strings
traffic_df = traffic_df.replace({np.nan: ''})

# Export the DataFrame to CSV
traffic_df.to_csv('traffic_df.csv')

print("traffic_df.csv file has been created successfully.")


traffic_df.csv file has been created successfully.


# Data Processing for Religion Distribution across Legislatures

In [25]:
# Load the Excel file
file_path = "Core_updated.xlsx"
workbook = openpyxl.load_workbook(file_path)

# Get the sheet names and store them in a tuple
sheet_names = tuple(workbook.sheetnames)

# Mapping of individual religions to broader categories
religion_mapping = {
    'Christianity': ['catholicism', 'orthodox eastern', 'protestantism', 'protestantism hussite', 'protestantism methodist', 'protestantism lutheran', 'protestantism anglican', 'protestantism anglicanism', 'anglicanism', 'protestantism baptism', 'protestantism baptist', 'protestantism presbyterian', 'protestantism adventist', 'protestantism pentecostal', 'protestantism quaker', 'protestantism restorationism', 'protestantism reformed', 'protestantism evangelical', 'protestantism anabaptism', 'protestantism arminianism', 'protestantism nontrinitarian', 'protestantism unitarian', 'protestantism christian science', 'protestantism non-denominational', 'protestantism apostolic', 'protestantism proto'],
    'Islam': ['islam'],
    'Hinduism': ['hindu'],
    'Buddhism': ['buddhism', 'nichiren shu', 'jodo_shinshu', 'soka gakkai'],
    'Judaism': ['judaism', 'orthodox', 'conservative', 'reform'],
}
#Others include : atheism,honganji-ha,confucianism,happy science,tenrikyo,yazidism,alevism,agnosticism,sikhism,zoroastrianism,druze,candomblé,bahá'í_faith

# Define the columns to consider
religion_columns = ['Christianity', 'Islam', 'Hinduism', 'Buddhism', 'Judaism', 'Others']

# Initialize a dictionary to store country-wise religion counts
religion_dict = {}

# Iterate through each sheet
for sheet_name in workbook.sheetnames:
    # Get the current sheet
    sheet = workbook[sheet_name]

    # Initialize country-wise religion counts
    country_religion_count = {'Christianity': 0, 'Islam': 0, 'Hinduism': 0, 'Buddhism': 0, 'Judaism': 0, 'Others': 0}


    # Iterate through rows starting from the second row 
    for row in sheet.iter_rows(min_row=2, values_only=True):
        religion = row[6]  
        # Check the mapping to determine the broader category
        for category, religions_list in religion_mapping.items():
            if religion in religions_list:
                country_religion_count[category] += 1
                break
        else:
            country_religion_count['Others'] += 1


    # Initialize counts tuple
    counts_tuple = ()

    for column in religion_columns:
      counts_tuple += (country_religion_count[column],)


    # Store country-wise religion counts in the dictionary
    religion_dict[sheet_name] = counts_tuple

# Print the sheet names tuple and the country-wise religion counts
#print("Legislature Names:", sheet_names)
#print("Country-wise Religion Counts:", religion_dict)

with open('religion_dict.json', 'w') as json_file:
    json.dump(religion_dict, json_file)

print("religion_dict.json file has been created successfully.")

religion_dict.json file has been created successfully.
