-
Notifications
You must be signed in to change notification settings - Fork 0
/
liked.py
124 lines (104 loc) · 4.35 KB
/
liked.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import pandas as pd
import requests
from datetime import datetime
from io import StringIO
import numpy as np
# print("Likability data...")
csv_url = 'https://projects.fivethirtyeight.com/polls/data/favorability_polls.csv'
# Coloring
start_color = 164
skip_color = 3
# Define the time decay weighting
decay_rate = 2
half_life_days = 28
# Constants for the weighting calculations
grade_weights = {
'A+': 1.0,
'A': 0.9,
'A-': 0.8,
'A/B': 0.75,
'B+': 0.7,
'B': 0.6,
'B-': 0.5,
'B/C': 0.45,
'C+': 0.4,
'C': 0.3,
'C-': 0.2,
'C/D': 0.15,
'D+': 0.1,
'D': 0.05,
'D-': 0.025
}
# Normalized population weights
population_weights = {
'lv': 1.0,
'rv': 0.6666666666666666,
'v': 0.5,
'a': 0.3333333333333333,
'all': 0.3333333333333333
}
# Function to download and return a pandas DataFrame from a CSV URL
def download_csv_data(url):
response = requests.get(url)
if response.status_code == 200:
csv_data = StringIO(response.content.decode('utf-8'))
return pd.read_csv(csv_data)
else:
raise Exception("Failed to download CSV data")
# Define a function to calculate time decay weight
def time_decay_weight(dates, decay_rate, half_life_days):
reference_date = pd.Timestamp.now()
days_old = (reference_date - dates).dt.days
return np.exp(-np.log(decay_rate) * days_old / half_life_days)
def get_color_code(period_index, total_periods, skip_color):
return start_color + (period_index * skip_color)
def calculate_and_print_favorability(df, period_value, period_type='months', period_index=0, total_periods=1):
df['created_at'] = pd.to_datetime(
df['created_at'], format='%m/%d/%y %H:%M', errors='coerce')
filtered_df = df.dropna(subset=['created_at']).copy()
if period_type == 'months':
filtered_df = filtered_df[(filtered_df['created_at'] > (pd.Timestamp.now() - pd.DateOffset(months=period_value))) &
(filtered_df['politician'].isin(['Joe Biden', 'Donald Trump']))]
elif period_type == 'days':
filtered_df = filtered_df[(filtered_df['created_at'] > (pd.Timestamp.now() - pd.Timedelta(days=period_value))) &
(filtered_df['politician'].isin(['Joe Biden', 'Donald Trump']))]
if not filtered_df.empty:
filtered_df['time_decay_weight'] = time_decay_weight(
filtered_df['created_at'], decay_rate, half_life_days)
filtered_df['grade_weight'] = filtered_df['fte_grade'].map(grade_weights).fillna(0.0125)
filtered_df['population'] = filtered_df['population'].str.lower()
filtered_df['population_weight'] = filtered_df['population'].map(lambda x: population_weights.get(x, 1))
list_weights = np.array([
filtered_df['grade_weight'],
filtered_df['population_weight'],
filtered_df['time_decay_weight']
])
filtered_df['combined_weight'] = np.prod(list_weights, axis=0)
weighted_sums = filtered_df.groupby('politician')['combined_weight'].apply(lambda x: (x * filtered_df.loc[x.index, 'favorable']/100).sum())
total_weights = filtered_df.groupby('politician')['combined_weight'].sum()
weighted_averages = weighted_sums / total_weights
biden_average = weighted_averages.get('Joe Biden', 0)
trump_average = weighted_averages.get('Donald Trump', 0)
differential = (biden_average) - (trump_average)
favored_candidate = "Biden" if differential > 0 else "Trump"
combined_period = f"{period_value}{period_type[0]}"
color_code = get_color_code(period_index, total_periods, skip_color)
print(f"\033[38;5;{color_code}m{combined_period:<4} B:{abs(biden_average):5.2%} T:{abs(trump_average):5.2%} {differential:+5.2%} {favored_candidate}\033[0m")
else:
print(f"{period_value}{period_type[0]}: No data available for the specified period")
if __name__ == "__main__":
favorability_df = download_csv_data(csv_url)
periods = [
(12, 'months'),
(6, 'months'),
(3, 'months'),
(1, 'months'),
(21, 'days'),
(14, 'days'),
(7, 'days'),
(3, 'days'),
(1, 'days')
]
total_periods = len(periods)
for index, (period_value, period_type) in enumerate(periods):
calculate_and_print_favorability(favorability_df, period_value, period_type, index, total_periods)