In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import openpyxl
from pagerank import powerIteration

In [2]:
# 2017

# Importing Data
data_2017_know = pd.read_csv("data/2017_know.csv", index_col=0)
data_2017_like = pd.read_csv("data/2017_like.csv", index_col=0)
data_2017_contact = pd.read_csv("data/2017_contact.csv", index_col=0)
data_2017_trust = pd.read_csv("data/2017_trust.csv", index_col=0)
data_2017_support = pd.read_csv("data/2017_support.csv", index_col=0)

# General Data / Demographics
data_2017_demographics = pd.read_csv("data/2017_demographics.csv", index_col=0)
people = list(data_2017_demographics.sort_values(by=['lname']).index)
data_2017_demographics_men = data_2017_demographics[data_2017_demographics['gender']=='Male']
data_2017_demographics_women = data_2017_demographics[data_2017_demographics['gender']=='Female']

men = list(data_2017_demographics_men.index)
women = list(data_2017_demographics_women.index)

# Average: The Numbers
avg_2017_know = data_2017_know.mean()
avg_2017_like = data_2017_like.mean()
avg_2017_contact = data_2017_contact.mean()
avg_2017_trust = data_2017_trust.mean()
avg_2017_support = data_2017_support.mean()
avg_2017_all = pd.concat([avg_2017_know, avg_2017_contact, avg_2017_like, avg_2017_trust, avg_2017_support], axis=1, join='inner')
avg_2017_all.columns = ['know', 'contact', 'like', 'trust', 'support']
avg_2017_all_corr = avg_2017_all.corr() # Correlation

# Raw Correlations
data_2017_know_corr = data_2017_know.corr()
data_2017_like_corr = data_2017_like.corr()
data_2017_contact_corr = data_2017_contact.corr()
data_2017_trust_corr = data_2017_trust.corr()
data_2017_support_corr = data_2017_support.corr()


# 2019

# Importing Data
data_2019_know = pd.read_csv("data/2019_know.csv", index_col=0)
data_2019_like = pd.read_csv("data/2019_like.csv", index_col=0)
data_2019_contact = pd.read_csv("data/2019_contact.csv", index_col=0)
data_2019_trust = pd.read_csv("data/2019_trust.csv", index_col=0)
data_2019_support = pd.read_csv("data/2019_support.csv", index_col=0)

# General Data / Demographics
data_2019_demographics = pd.read_csv("data/2019_demographics.csv", index_col=0)
people = list(data_2019_demographics.sort_values(by=['lname']).index)
data_2019_demographics_men = data_2019_demographics[data_2019_demographics['gender']=='Male']
data_2019_demographics_women = data_2019_demographics[data_2019_demographics['gender']=='Female']

men = list(data_2019_demographics_men.index)
women = list(data_2019_demographics_women.index)

# Average: The Numbers
avg_2019_know = data_2019_know.mean()
avg_2019_like = data_2019_like.mean()
avg_2019_contact = data_2019_contact.mean()
avg_2019_trust = data_2019_trust.mean()
avg_2019_support = data_2019_support.mean()
avg_2019_all = pd.concat([avg_2019_know, avg_2019_contact, avg_2019_like, avg_2019_trust, avg_2019_support], axis=1, join='inner')
avg_2019_all.columns = ['know', 'contact', 'like', 'trust', 'support']
avg_2019_all_corr = avg_2019_all.corr() # Correlation

# Raw Correlations
data_2019_know_corr = data_2019_know.corr()
data_2019_like_corr = data_2019_like.corr()
data_2019_contact_corr = data_2019_contact.corr()
data_2019_trust_corr = data_2019_trust.corr()
data_2019_support_corr = data_2019_support.corr()


# Differences
avg_diff_know = avg_2019_know.sub(avg_2017_know)
avg_diff_like = avg_2019_like.sub(avg_2017_like)
avg_diff_contact = avg_2019_contact.sub(avg_2017_contact)
avg_diff_trust = avg_2019_trust.sub(avg_2017_trust)
avg_diff_support = avg_2019_support.sub(avg_2017_support)
avg_diff_all = avg_2019_all.sub(avg_2017_all)
avg_diff_all_corr = avg_diff_all.corr() # Correlation

In [3]:
# 2017 - PageRank: The Numbers
pr_2017_know = powerIteration(data_2017_know)
pr_2017_like = powerIteration(data_2017_like)
pr_2017_contact = powerIteration(data_2017_contact)
pr_2017_trust = powerIteration(data_2017_trust)
pr_2017_support = powerIteration(data_2017_support)
pr_2017_all = pd.concat([pr_2017_know, pr_2017_contact, pr_2017_like, pr_2017_trust, pr_2017_support], axis=1, join='inner')
pr_2017_all.columns = ['know', 'contact', 'like', 'trust', 'support']
pr_2017_all_corr = pr_2017_all.corr() # Correlation


# 2019 - PageRank: The Numbers
pr_2019_know = powerIteration(data_2019_know)
pr_2019_like = powerIteration(data_2019_like)
pr_2019_contact = powerIteration(data_2019_contact)
pr_2019_trust = powerIteration(data_2019_trust)
pr_2019_support = powerIteration(data_2019_support)
pr_2019_all = pd.concat([pr_2019_know, pr_2019_contact, pr_2019_like, pr_2019_trust, pr_2019_support], axis=1, join='inner')
pr_2019_all.columns = ['know', 'contact', 'like', 'trust', 'support']
pr_2019_all_corr = pr_2019_all.corr() # Correlation

# Differences - PageRank: The Numbers
pr_diff_know = pr_2019_know.sub(pr_2017_know)
pr_diff_like = pr_2019_like.sub(pr_2017_like)
pr_diff_contact = pr_2019_contact.sub(pr_2017_contact)
pr_diff_trust = pr_2019_trust.sub(pr_2017_trust)
pr_diff_support = pr_2019_support.sub(pr_2017_support)
pr_diff_all = pr_2019_all.sub(pr_2017_all)
pr_diff_all_corr = pr_diff_all.corr() # Correlation

In [4]:
tables = [data_2017_demographics, data_2019_demographics, avg_2017_all, avg_2019_all, avg_diff_all, pr_2017_all, pr_2019_all, pr_diff_all]
all_data = pd.concat(tables, axis=1, join='inner')
all_data.to_excel("all_data.xlsx")