# Nominal Biases
Calculations and Figures related to nomination between male and female politicians on Reddit. Significance testing done using these obtained numbers in RStudio.

In [None]:
import pandas as pd
from ast import literal_eval
import glob
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import norm

In [None]:
# Colours
WOMEN = "#ff876d"
WOMEN_POS = "#ffcea8"
WOMEN_NEG = "#cb4d36"
MEN = "#009ce4"
MEN_POS = "#76efff"#"#b8e1ff"
MEN_NEG = "#056497"
ACCENT = "#d2518f"
ACCENT2 = "#987ad3"

In [None]:
# df = pd.read_csv("C:/Users/vanki/partisan.csv")
dfs = []
for file in glob.glob("C:/Users/vanki/201*.csv"): ## Collect dataset
    dfs.append(pd.read_csv(file))
    
# # Remove any corruptions in dataset
# df = pd.concat(dfs).drop(columns=["Unnamed: 0"])
# df.dropna(subset=['Adjectives'], inplace=True) # somekind of mistake

df = df[df['sex'].isin(['male','female'])]

In [None]:
# # To look at cross-partisan subsets only
# left = ['Liberal', 'SocialDemocracy', 'socialism', 'alltheleft', 'neoliberal', 'democrats']
# right = ['Libertarian', 'Conservative', 'Republican']
# alt_right = ['The_Donald']

# full_df = df
# maps = {}
# for sr in left:
#     maps[sr] = 'left'
# for sr in right:
#     maps[sr] = 'right'
# maps['The_Donald'] = 'alt_right'
# df['group'] = df['subreddit'].map(maps)
# df.dropna(subset=['group'], inplace=True)

In [None]:
# family_name_used vs full_name_used are determined based off the used name and the entity's recorded name on Wikidata
# These were determined in the data processing stage

def to_bool(value):
    try:
        return literal_eval(value)
    except ValueError:
        return value

df['family_name_used'] = df['family_name_used'].map(to_bool)
df['nickname_used'] = df['nickname_used'].map(to_bool)
df['full_name_used'] = df['full_name_used'].map(to_bool)

In [None]:
# Calculate sums for each gender

fem = df[df['sex'] == 'female']
mal = df[df['sex'] == 'male']

fg = fem['given_name_used'].sum()
mg = mal['given_name_used'].sum()
fs = fem['family_name_used'].sum()
ms = mal['family_name_used'].sum()
ff = fem['full_name_used'].sum()
mf = mal['full_name_used'].sum()
fn = fem['nickname_used'].sum()
mn = mal['nickname_used'].sum()

In [None]:
# Print out percentages
print('Female average use of first name', df[df['sex'] == 'female']['given_name_used'].mean())
print('Male average use of first name', df[df['sex'] == 'male']['given_name_used'].mean())
print(ttest_ind(df[df['sex'] == 'female']['given_name_used'].map(int), df[df['sex'] == 'male']['given_name_used'].map(int)))
print()
print('Female average use of family name', df[df['sex'] == 'female']['family_name_used'].mean())
print('Male average use of family name', df[df['sex'] == 'male']['family_name_used'].mean())
print(ttest_ind(df[df['sex'] == 'female']['family_name_used'].map(int), df[df['sex'] == 'male']['family_name_used'].map(int)))
print()
print('Female average use of nickname', df[df['sex'] == 'female']['nickname_used'].mean())
print('Male average use of nickname', df[df['sex'] == 'male']['nickname_used'].mean())
print(ttest_ind(df[df['sex'] == 'female']['nickname_used'].map(int), df[df['sex'] == 'male']['nickname_used'].map(int)))
print()
print('Female average use of full name', df[df['sex'] == 'female']['full_name_used'].mean())
print('Male average use of full name', df[df['sex'] == 'male']['full_name_used'].mean())
print(ttest_ind(df[df['sex'] == 'female']['full_name_used'].map(int), df[df['sex'] == 'male']['full_name_used'].map(int)))

In [None]:
## Visualize Results
N = 4
ind = np.arange(N)  # the x locations for the groups
width = 0.35       # the width of the bars

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111)
rects1 = ax.bar(ind, ms, width, color=MEN, label='Men', alpha=0.8)
rects2 = ax.bar(ind+width, fs, width, color=WOMEN, label='Women')

distance = 0.25
dub_w = 0.16

# for i in range(N):
#     plt.axhline(y=E_avgs[0][i], xmin=i*distance+0.045, xmax=(i*distance+0.045)+dub_w, color='red')

# plt.axhline(y=0, xmin=0, xmax=0, color='red', label="Expected")
# add some
ax.set_ylabel('Percentage of use')
ax.set_title('Name used in reference to politician')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels( ('Surname', 'Given name', 'Full name', 'Other') )

ax.legend()
plt.savefig("name_use.tiff",dpi=300, bbox_inches='tight')
# plt.show()

In [None]:
subset = df[['given_name_used', 'family_name_used', 'full_name_used','nickname_used']].isin([True, 'True'])
df['name'] = subset.where(subset==True).apply(pd.Series.first_valid_index, axis=1)

df[['sex','name']].to_csv("general_nominal.csv")
df.dropna(subset=['group'])[['sex','name', 'group']].to_csv("partisan_nominal_nt.csv") # Save for input into RStudio to calculate significance

## Cross-partisan

In [None]:
values = []
for group in ['left', 'right', 'alt_right']:
    for query in ['family_name_used', 'given_name_used', 'full_name_used', 'nickname_used']:
        for gender in ['male', 'female']:
            values.append(df[(df['sex'] == gender) & (df['group'] == group)][query].mean())

In [None]:
#%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(figsize=(10,15))
ax = fig.add_subplot(111, projection = "3d")

N = 4
ind = np.arange(N)  # the x locations for the groups
widthx = 0.35       # the width of the bars
widthy = 0.5

#ax.set_xlabel("x")
ax.set_ylabel("Partisanship", size=14, labelpad=3) 
ax.set_xlabel("Name Used", size=14)
ax.set_zlabel("Percentage of Use", size=14, labelpad=5)
ax.set_xlim3d(-1,4)
ax.set_ylim3d(0,widthy*3) 
ax.set_zlim3d(0,1)
# ax.set_title('Name used in reference to politician', size=16)
ax.set_xticks(ind + widthx/ 2)
ax.set_xticklabels( ('Surname', 'Given name', 'Full name', 'Other') )

ax.set_yticks([widthy/2,widthy*3/2,widthy*2.5])
ax.set_yticklabels( ('Left', 'Right', 'Alt right'), rotation=90)

xpos = [0,0+widthx, 1,1+widthx, 2.,2+widthx,3.,3+widthx]*3
ypos = [0]*8 + [0 + widthy]*8 + [2*widthy]*8 
zpos = [0]*8*3

colors = [MEN,WOMEN,MEN,WOMEN,MEN,WOMEN,MEN,WOMEN]*3
ax.legend(handles=legend_elements, bbox_to_anchor=(0.4,0.8), loc="upper center")
ax.bar3d(xpos, ypos, zos, widthx, widthy, values, color=colors, edgecolor='black')
ax.view_init(elev=25, azim=-85)
plt.savefig("Name_partisan.tif",dpi=300, bbox_inches='tight')
plt.show()