In [2]:
# print out correlation and rmse results for predicted P(T) and predicted ratings for real data for full model, reports only model, and ratings only model
import os
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
import warnings
plt.rcParams['font.family'] = 'serif'

# Set the font used for math expressions to LaTeX
plt.rcParams["mathtext.fontset"] = "cm"

In [3]:
# file paths
base_file = '/share/garg/311_data/sb2377/clean_codebase/three_year_base.csv'
type_rating_observed_base_file = '/share/garg/311_data/sb2377/clean_codebase/three_year_type_rating_observed_base.csv'
results_dir = '/share/garg/311_data/sb2377/results'

# user specified arguments
types = {'Street': 'StreetConditionDOT',
         'Park': 'MaintenanceorFacilityDPR',
         'Rodent': 'RodentDOHMH',
         'Food': 'FoodDOHMH',
         'DCWP': 'ConsumerComplaintDCWP'}
models = {'Full model': {'job_ids':[3000] + [i * 3 + 3005 for i in range(12)]},
          'Ratings-only model': {'job_ids':[3002] + [i * 3 + 3007 for i in range(12)]},
          'Reports-only model': {'job_ids':[3001] + [i * 3 + 3006 for i in range(12)]}}
epoch = '59'

In [4]:
# load files
base_df = pd.read_csv(base_file)
type_rating_observed_base_df = pd.read_csv(type_rating_observed_base_file)

In [5]:
# get type indices
# for df with all types
type_df = base_df[['typeagency', 'type_idxs']].drop_duplicates()
indices = {}
for type_name, type_id in types.items():
    idx = type_df[type_df['typeagency'] == type_id]['type_idxs'].iloc[0]
    indices[type_name] = idx

# for df with only types with observed ratings
type_df = type_rating_observed_base_df[['typeagency', 'type_idxs']].drop_duplicates()
type_rating_observed_indices = {}
for type_name, type_id in types.items():
    idx = type_df[type_df['typeagency'] == type_id]['type_idxs'].iloc[0]
    type_rating_observed_indices[type_name] = idx

In [6]:
# get predicted ratings for all jobs for types with observed ratings
checkpoint_file = '{}/job{}/model-epoch={}.ckpt'
results_file = '{}/job{}/epoch={}_test.pkl'
checkpoint_counters = {}
results_counters = {}
for m in models:
    checkpoint_counters[m] = 0
    results_counters[m] = 0
type_rating_observed_dfs = {}
for m in models:
    type_rating_observed_dfs[m] = []

for m in models:
    for i, job_idx in enumerate(models[m]['job_ids']):
        if os.path.exists(checkpoint_file.format(results_dir, job_idx, epoch)):
            checkpoint_counters[m] += 1
        if os.path.exists(results_file.format(results_dir, job_idx, epoch)):
            results_counters[m] += 1
            with open(results_file.format(results_dir, job_idx, epoch), 'rb') as file:
                pred_rating, true_rating, mask, node_embedding, type_embedding, node_idxs, type_idxs, demographics, pred_pt, true_t = pickle.load(file)

            df = pd.DataFrame()
            df['pred_rating'] = pred_rating
            df['true_rating'] = true_rating
            df['node_idxs'] = node_idxs
            df['type_idxs'] = type_idxs
            df['pred_pt'] = pred_pt
            df['true_t'] = true_t
            df['mask'] = mask

            type_rating_observed_dfs[m].append(df)

for m in models:
    print('{}: checkpoint files done = {}'.format(m, checkpoint_counters[m]))
    print('{}: results files done = {}'.format(m, results_counters[m]))

Full model: checkpoint files done = 13
Full model: results files done = 13
Ratings-only model: checkpoint files done = 13
Ratings-only model: results files done = 13
Reports-only model: checkpoint files done = 13
Reports-only model: results files done = 13


In [7]:
# get predicted P(T) and ratings for all jobs for types with unobserved ratings
checkpoint_file = '{}/job{}/model-epoch={}.ckpt'
results_file = '{}/job{}/epoch={}_test_unobserved.pkl'
checkpoint_counters = {}
results_counters = {}
type_rating_unobserved_models = ['Full model', 'Reports-only model']
for m in type_rating_unobserved_models:
    checkpoint_counters[m] = 0
    results_counters[m] = 0
type_rating_unobserved_dfs = {}
for m in type_rating_unobserved_models:
    type_rating_unobserved_dfs[m] = []

for m in type_rating_unobserved_models:
    for i, job_idx in enumerate(models[m]['job_ids']):
        if os.path.exists(checkpoint_file.format(results_dir, job_idx, epoch)):
            checkpoint_counters[m] += 1
        if os.path.exists(results_file.format(results_dir, job_idx, epoch)):
            results_counters[m] += 1
            with open(results_file.format(results_dir, job_idx, epoch), 'rb') as file:
                pred_rating, true_rating, mask, node_embedding, type_embedding, node_idxs, type_idxs, demographics, pred_pt, true_t = pickle.load(file)

            df = pd.DataFrame()
            df['pred_rating'] = pred_rating
            df['true_rating'] = true_rating
            df['node_idxs'] = node_idxs
            df['type_idxs'] = type_idxs
            df['pred_pt'] = pred_pt
            df['true_t'] = true_t
            df['mask'] = mask

            type_rating_unobserved_dfs[m].append(df)

for m in type_rating_unobserved_models:
    print('{}: checkpoint files done = {}'.format(m, checkpoint_counters[m]))
    print('{}: results files done = {}'.format(m, results_counters[m]))

Full model: checkpoint files done = 13
Full model: results files done = 13
Reports-only model: checkpoint files done = 13
Reports-only model: results files done = 13


In [8]:
# combine predicted P(T) and ratings for types with observed ratings and types with unobserved ratings
dfs = {}
for m in type_rating_unobserved_models:
    dfs[m] = []
    for i in range(len(type_rating_observed_dfs[m])):
        full_df = pd.concat([type_rating_observed_dfs[m][i], type_rating_unobserved_dfs[m][i]])
        dfs[m].append(full_df)
dfs['Ratings-only model'] = type_rating_observed_dfs['Ratings-only model']

In [9]:
# print out correlation and rmse results for predicted P(T)
p_values_pt = {}
for m in type_rating_unobserved_models:
    df_set = dfs[m]
    p_values_pt[m] = []
    corrs = []
    rmses = []
    for idx in range(len(dfs[m][0]['type_idxs'].unique())):
        type_corrs = []
        type_rmses = []
        for df in df_set:
            df_type = df[df['type_idxs'] == idx]
            node_df = df_type.groupby(['node_idxs', 'type_idxs']).mean().reset_index()

            # calculate correlation
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                corr = pearsonr(node_df['pred_pt'], node_df['true_t'])
            type_corrs.append(corr[0])
            p_values_pt[m].append(corr[1])

            # calculate rmse
            rmse = np.sqrt(mean_squared_error(node_df['pred_pt'], node_df['true_t']))
            type_rmses.append(rmse)

        corrs.append(type_corrs)
        rmses.append(type_rmses)
    
    with warnings.catch_warnings():
        # calculate mean and 95% confidence interval over correlations
        warnings.simplefilter("ignore")
        corrs = np.array(corrs)
        filtered_corrs = corrs[~np.isnan(corrs).any(axis=1)]
        mean_for_each_job = filtered_corrs.mean(axis=0)
        mean_overall = filtered_corrs.mean()
        se_across_jobs = np.std(mean_for_each_job) / np.sqrt(len(mean_for_each_job) - 1)
        print('Model: {}, Corr: {:.4f} \pm {:.4f}'.format(m, mean_overall, 1.96 * se_across_jobs))

        # calculate mean and 95% confidence interval over rmses
        rmses = np.array(rmses)
        filtered_rmses = rmses[~np.isnan(rmses).any(axis=1)]
        mean_for_each_job = filtered_rmses.mean(axis=0)
        mean_overall = filtered_rmses.mean()
        se_across_jobs = np.std(mean_for_each_job) / np.sqrt(len(mean_for_each_job) - 1)
        print('Model: {}, RMSE: {:.4f} \pm {:.4f}'.format(m, mean_overall, 1.96 * se_across_jobs))


Model: Full model, Corr: 0.2371 \pm 0.0110
Model: Full model, RMSE: 0.1104 \pm 0.0025
Model: Reports-only model, Corr: 0.5406 \pm 0.0063
Model: Reports-only model, RMSE: 0.0566 \pm 0.0012


In [13]:
# print proportion of p-values < threshold for each model
threshold = {'Full model': 0.01, 'Reports-only model': 0.01}
for m in ['Full model', 'Reports-only model']:
    count = 0
    for v in p_values_pt[m]:
        if v < threshold[m]:
            count += 1
    print(m, count / len(p_values_pt[m]))


Full model 0.8273381294964028
Reports-only model 0.9640287769784173


In [20]:
# print proportion of p-values < threshold for each model
threshold = {'Full model': 0.1, 'Reports-only model': 0.1}
for m in ['Full model', 'Reports-only model']:
    count = 0
    for v in p_values_pt[m]:
        if v < threshold[m]:
            count += 1
    print(m, count / len(p_values_pt[m]))


Full model 0.8516878804648589
Reports-only model 0.9723298284449363


In [14]:
# print out correlation and rmse results for predicted ratings
p_values_r = {}
for m in models:
    df_set = type_rating_observed_dfs[m]
    p_values_r[m] = []
    corrs = []
    rmses = []
    for t in types:
        type_corrs = []
        type_rmses = []
        idx = indices[t]
        type_rating_observed_idx = type_rating_observed_indices[t]
        for df in df_set:
            df_type = df[df['type_idxs'] == idx]
            if m == 'Ratings-only model':
                df_type = df[df['type_idxs'] == type_rating_observed_idx]
            else:
                df_type = df[df['type_idxs'] == idx]
            node_df = df_type.groupby(['node_idxs', 'type_idxs']).mean().reset_index()

            with warnings.catch_warnings():
                if m == 'Reports-only model':
                    # for reports-only model, we use -P(T) as a proxy for r
                    corr = pearsonr(-1 * node_df['pred_pt'], node_df['true_rating'])
                else:
                    corr = pearsonr(node_df['pred_rating'], node_df['true_rating'])
            type_corrs.append(corr[0])
            p_values_r[m].append(corr[1])

            # calculate rmse
            if m == 'Reports-only model':
                rmse = np.nan
            else:
                rmse = np.sqrt(mean_squared_error(node_df['pred_rating'], node_df['true_rating']))
            type_rmses.append(rmse)

        corrs.append(type_corrs)
        rmses.append(type_rmses)
    
    with warnings.catch_warnings():
        # calculate mean and 95% confidence interval over correlations
        warnings.simplefilter("ignore")
        corrs = np.array(corrs)
        filtered_corrs = corrs[~np.isnan(corrs).any(axis=1)]
        mean_for_each_job = filtered_corrs.mean(axis=0)
        mean_overall = filtered_corrs.mean()
        se_across_jobs = np.std(mean_for_each_job) / np.sqrt(len(mean_for_each_job) - 1)
        print('Model: {}, Corr: {:.4f} \pm {:.4f}'.format(m, mean_overall, 1.96 * se_across_jobs))

        # calculate mean and 95% confidence interval over rmses
        rmses = np.array(rmses)
        filtered_rmses = rmses[~np.isnan(rmses).any(axis=1)]
        mean_for_each_job = filtered_rmses.mean(axis=0)
        mean_overall = filtered_rmses.mean()
        se_across_jobs = np.std(mean_for_each_job) / np.sqrt(len(mean_for_each_job) - 1)
        print('Model: {}, RMSE: {:.4f} \pm {:.4f}'.format(m, mean_overall, 1.96 * se_across_jobs))


Model: Full model, Corr: 0.5303 \pm 0.0194
Model: Full model, RMSE: 0.5833 \pm 0.0123
Model: Ratings-only model, Corr: 0.5223 \pm 0.0185
Model: Ratings-only model, RMSE: 0.5852 \pm 0.0116
Model: Reports-only model, Corr: 0.0993 \pm 0.0143
Model: Reports-only model, RMSE: nan \pm nan


In [22]:
# print proportion of p-values < threshold for each model
threshold = {'Full model': 0.01, 'Ratings-only model': 0.01, 'Reports-only model': 0.01}
for m in models:
    count = 0
    for v in p_values_r[m]:
        if v < threshold[m]:
            count += 1
    print(m, count / len(p_values_r[m]))


Full model 1.0
Ratings-only model 1.0
Reports-only model 0.4461538461538462


In [21]:
# print proportion of p-values < threshold for each model
threshold = {'Full model': 0.05, 'Ratings-only model': 0.05, 'Reports-only model': 0.05}
for m in models:
    count = 0
    for v in p_values_r[m]:
        if v < threshold[m]:
            count += 1
    print(m, count / len(p_values_r[m]))


Full model 1.0
Ratings-only model 1.0
Reports-only model 0.5384615384615384


In [23]:
# print proportion of p-values < threshold for each model
threshold = {'Full model': 0.1, 'Ratings-only model': 0.1, 'Reports-only model': 0.1}
for m in models:
    count = 0
    for v in p_values_r[m]:
        if v < threshold[m]:
            count += 1
    print(m, count / len(p_values_r[m]))


Full model 1.0
Ratings-only model 1.0
Reports-only model 0.6
