In [29]:
import pandas as pd

# load data
data = pd.read_csv(r"CLEAN_DATA\Europe_2015_2020_manifesto_gini_ps.csv")


# Function to categorize political orientation on a per-party basis
def categorize_orientation_party(rile):
    if rile < 0:
        return 'Left'
    elif rile > 0:
        return 'Right'
        
data['Political Orientation Party'] = data['rile'].apply(categorize_orientation_party)

# Calculate the success rate for Left and Right parties within each country
data['Success'] = data['absseat'].apply(lambda x: 1 if x > 0 else 0)
success_rate_by_orientation_country = data.groupby(['countryname', 'Political Orientation Party'])['Success'].mean().reset_index()

# Pivot for success rate
success_rate_pivot = success_rate_by_orientation_country.pivot(index='countryname', columns='Political Orientation Party', values='Success').reset_index()
success_rate_pivot.columns = ['Country', 'Success Rate - Left Parties', 'Success Rate - Right Parties']

# Calculate average stance positivity score per political orientation within each country
avg_stance_by_orientation = data.groupby(['countryname', 'Political Orientation Party'])['stance_positivity_score'].mean().reset_index()

# Pivot for stance positivity score
avg_stance_pivot = avg_stance_by_orientation.pivot(index='countryname', columns='Political Orientation Party', values='stance_positivity_score').reset_index()
avg_stance_pivot.columns = ['Country', 'Avg Positivity Score - Left Parties', 'Avg Positivity Score - Right Parties']

# Merge average stance positivity score and success rate tables
country_profiles = avg_stance_pivot.merge(success_rate_pivot, on='Country', how='left')
country_profiles.fillna(0, inplace=True)

# Determine Comparative Success Rate
def compare_success_rates(row):
    if row['Success Rate - Left Parties'] > row['Success Rate - Right Parties']:
        return '+'
    elif row['Success Rate - Left Parties'] < row['Success Rate - Right Parties']:
        return '-'
    else:
        return '±'

country_profiles['Comparative Success Rate - Left'] = country_profiles.apply(compare_success_rates, axis=1)
country_profiles['Comparative Success Rate - Right'] = country_profiles['Comparative Success Rate - Left'].apply(lambda x: '+' if x == '-' else ('-' if x == '+' else '±'))

# Prepare the final DataFrame for display
final_df = country_profiles[['Country', 'Avg Positivity Score - Left Parties', 'Comparative Success Rate - Left', 'Avg Positivity Score - Right Parties', 'Comparative Success Rate - Right']]
final_df.rename(columns={'Avg Positivity Score - Left Parties': 'Stances - Left', 'Comparative Success Rate - Left': 'Success Rate - Left', 'Avg Positivity Score - Right Parties': 'Stances - Right', 'Comparative Success Rate - Right': 'Success Rate - Right'}, inplace=True)

# Apply a simple function to convert positivity scores to '+' or '-'
def convert_positivity(score):
    return '+' if score > 0 else '-'

final_df['Stances - Left'] = final_df['Stances - Left'].apply(convert_positivity)
final_df['Stances - Right'] = final_df['Stances - Right'].apply(convert_positivity)

# Assign country profile labels
unique_profiles = final_df.drop('Country', axis=1).drop_duplicates()
profile_labels = {tuple(row): 'Country ' + chr(65+i) for i, row in enumerate(unique_profiles.itertuples(index=False, name=None))}
final_df['Country Profile'] = final_df.drop('Country', axis=1).apply(tuple, axis=1).map(profile_labels)


# Group countries by 'Country Profile' and aggregate them into lists
profile_to_countries = final_df.groupby('Country Profile')['Country'].apply(list).reset_index()

# This DataFrame 'profile_to_countries' now contains two columns: 'Country Profile' and a list of 'Countries' for each profile

# Drop the 'countryname' column
final_df.drop(columns=['Country'], inplace=True)

# Sort the DataFrame by the 'Unique Country Profile' column
final_df.sort_values(by='Country Profile', inplace=True)

# Drop duplicates based on the 'Unique Country Profile' column
final_df.drop_duplicates(subset=['Country Profile'], keep='first', inplace=True)

# Move Unique Country Profile to the front
final_df = final_df[ ['Country Profile'] + [ col for col in final_df.columns if col != 'Country Profile' ] ]
final_df = final_df.merge(profile_to_countries, on='Country Profile')
# Convert the lists in the 'Country' column to strings without brackets
final_df['Country'] = final_df['Country'].apply(lambda x: ', '.join(x))

# Set the maximum column width to None to prevent truncation
pd.set_option('display.max_colwidth', None)

# Display the modified DataFrame
display(final_df)






A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.rename(columns={'Avg Positivity Score - Left Parties': 'Stances - Left', 'Comparative Success Rate - Left': 'Success Rate - Left', 'Avg Positivity Score - Right Parties': 'Stances - Right', 'Comparative Success Rate - Right': 'Success Rate - Right'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Stances - Left'] = final_df['Stances - Left'].apply(convert_positivity)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://p

Unnamed: 0,Country Profile,Stances - Left,Success Rate - Left,Stances - Right,Success Rate - Right,Country
0,Country A,+,-,+,+,"Austria, Iceland, Poland, Slovakia, Spain, Switzerland"
1,Country B,+,±,+,±,"Belgium, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Germany, Hungary, Ireland, Italy, Latvia, Lithuania, Netherlands, Norway, Portugal, Romania, Serbia, Slovenia, Sweden, Ukraine"
2,Country C,-,-,+,+,Bulgaria
3,Country D,+,+,+,-,"Greece, Moldova, Montenegro, United Kingdom"


In [27]:










# Step 1: Aggregate Data by Country
country_agg = avg.groupby('countryname').agg({
    'stance_positivity_score': 'mean',  # Average stance positivity score per country
    'rile': 'mean'  # Average RILE score per country
}).reset_index()

# Define thresholds for stance type and political orientation based on aggregated data
negative_threshold = country_agg['stance_positivity_score'].quantile(0.25)
positive_threshold = country_agg['stance_positivity_score'].quantile(0.75)

# Function to categorize aggregated stance type
def categorize_stance_agg(score):
    if score < negative_threshold:
        return 'Negative'
    elif score > positive_threshold:
        return 'Positive'
    else:
        return 'Neutral'

# Function to categorize aggregated political orientation
def categorize_orientation_agg(rile):
    if rile < 0:
        return 'Left'
    elif rile > 0:
        return 'Right'

# Apply categorization to aggregated data
country_agg['Stance Type'] = country_agg['stance_positivity_score'].apply(categorize_stance_agg)
country_agg['Political Orientation'] = country_agg['rile'].apply(categorize_orientation_agg)

# Step 2: Assign a unique profile to each country based on the aggregated characteristics
# This example uses a simple concatenation for demonstration, but you can customize this step
country_agg['Country Profile'] = 'Profile ' + country_agg['Stance Type'] + '-' + country_agg['Political Orientation']
unique_profiles = country_agg['Country Profile'].unique()

# Create a mapping from countries to profiles
country_profile_mapping = dict(zip(country_agg['countryname'], country_agg['Country Profile']))

# # Example output
# # You can also display the full country_agg DataFrame for a detailed view
# display(country_agg[['countryname', 'Stance Type', 'Political Orientation', 'Country Profile']])

# Group by the aggregated 'Country Profile' to find groups of countries with the same profile
profile_groups = country_agg.groupby('Country Profile')['countryname'].apply(list).reset_index()

# Assign a unique identifier to each country profile
profile_groups['Unique Country Profile'] = ['Country ' + chr(65+i) for i in range(len(profile_groups))]

# Now, 'profile_groups' DataFrame contains the mapping of each unique profile to the countries it includes
# The 'Unique Country Profile' column has the identifiers like 'Country A', 'Country B', etc.
# The 'countryname' column now lists the countries in each profile

# To merge this information back to the original aggregated data:
# First, create a mapping from 'Country Profile' to 'Unique Country Profile' and list of countries
profile_to_unique = dict(zip(profile_groups['Country Profile'], profile_groups['Unique Country Profile']))
profile_to_countries = dict(zip(profile_groups['Country Profile'], profile_groups['countryname']))

# Apply the mapping to assign the unique profile identifier to each country in the aggregated data
country_agg['Unique Country Profile'] = country_agg['Country Profile'].apply(lambda x: profile_to_unique[x])
# Also, assign the list of countries sharing the same profile
country_agg['Countries in Profile'] = country_agg['Country Profile'].apply(lambda x: ', '.join(profile_to_countries[x]))

# The 'country_agg' DataFrame now includes the unique country profile identifier and the list of countries in each profile
# display(country_agg[['countryname', 'Country Profile', 'Unique Country Profile', 'Countries in Profile']])

# Drop the 'countryname' column
country_agg.drop(columns=['countryname'], inplace=True)

# Sort the DataFrame by the 'Unique Country Profile' column
country_agg.sort_values(by='Unique Country Profile', inplace=True)

# Drop duplicates based on the 'Unique Country Profile' column
country_agg.drop_duplicates(subset=['Unique Country Profile'], keep='first', inplace=True)

# Move Unique Country Profile to the front
final_df = country_agg[ ['Unique Country Profile'] + [ col for col in country_agg.columns if col != 'Unique Country Profile' ] ]

# Display the modified DataFrame
display(final_df)



# ----- Things to add -----
# Success rate: what is the chance for a left wing party or a right wing party to get into government?
# 

NameError: name 'avg' is not defined