In [3]:
# Import all necessary Libraries
import numpy as np
import pandas as pd
import sys
import csv
import matplotlib.pyplot as plt

In [4]:
# Dataframe building function

def dataframeBuilder(csvFilename):
    # Read in the CSV File
    df = pd.read_csv(csvFilename)
    # Drop the 'total_ride_time' column as it did not aid in variance reduction
    df.drop('total_ride_time', inplace=True, axis=1)
    # Create the averaged random variable using actual and antithetic results
    df['sim_avg'] = ((df['sim_results'] + df['sim_results_anti'])/2)
    
    # Finding the optimal c value for lowest variance in our new control random variable
    var_of_x = df.total_overpacked_time.var()
    cov_of_x_and_y = df.total_overpacked_time.cov(df.sim_results)
    c = -cov_of_x_and_y/var_of_x

    # Add in new column of values of our control variable
    df['control_var'] = df['sim_results'] + (c*(df.total_overpacked_time-df.total_overpacked_time.mean()))

    # Show variances of all columns
    print(df.var())

    return df

In [5]:
def makeListOfDataFrames(filenames):
    dataframeList = list()
    for name in filenames:
        df = dataframeBuilder(name)
        dataframeList.append(df)
    return dataframeList

In [6]:
# Make a DataFrame of the Variance Reduction Results

def calculateVRAmount(dfs):
    headers = ['original_var_var', 'antithetic_var_var', 'difference_estim_var', '%_reduction_by_antithetic', '%_reduction_by_control']
    index = ['Brooklyn-Bound-Control', 'Brooklyn-Bound-Extra-Trains', 'Brooklyn-Bound-Limit-People', 'Manhattan-Bound-Control', 'Manhattan-Bound-Extra-Trains', 'Manhattan-Bound-Limit-People']
    # index = ['Brooklyn-Bound-Control', 'Brooklyn-Bound-Extra-Trains',  'Manhattan-Bound-Control', 'Manhattan-Bound-Extra-Trains']
    df_total = pd.DataFrame(columns=headers, index=index)
    for i in range(len(dfs)):
        rowData = list()
        rowData.append(dfs[i].sim_results.var())
        rowData.append(dfs[i].sim_avg.var())
        rowData.append(dfs[i].control_var.var())

        # Percent Difference Calculation: antithetic
        rowData.append( ((rowData[0] - rowData[1]) / rowData[0] ) * 100)
        # Percent Difference Calculation: control
        rowData.append( ((rowData[0] - rowData[2]) / rowData[0] ) * 100)
        # Update the Cell values in df_total dataframe
        df_total.iloc[i,:] = rowData

    return df_total


In [7]:
filenames = ['output-brooklynbound-control.csv', 'output-brooklynbound-extratrain.csv', 'output-brooklynbound-limitpeople.csv','output-manhattanbound-control.csv', 'output-manhattanbound-extratrain.csv', 'output-manhattanbound-limitpeople.csv']

In [8]:
dfs = makeListOfDataFrames(filenames)
df_total = calculateVRAmount(dfs)
df_total

sim_results              1.028311e-04
sim_results_anti         1.073194e-04
total_overpacked_time    1.768417e+08
sim_avg                  9.816265e-05
control_var              3.947791e-05
dtype: float64
sim_results              8.785749e-05
sim_results_anti         8.992849e-05
total_overpacked_time    1.650597e+08
sim_avg                  8.316578e-05
control_var              3.349377e-05
dtype: float64
sim_results              7.625988e-06
sim_results_anti         6.624271e-06
total_overpacked_time    1.328589e+08
sim_avg                  6.266563e-06
control_var              3.620360e-06
dtype: float64
sim_results              3.319487e-04
sim_results_anti         3.444146e-04
total_overpacked_time    9.529417e+07
sim_avg                  2.360964e-04
control_var              1.984959e-04
dtype: float64
sim_results              2.952593e-04
sim_results_anti         3.505901e-04
total_overpacked_time    9.792394e+07
sim_avg                  2.318755e-04
control_var              1.8

Unnamed: 0,original_var_var,antithetic_var_var,difference_estim_var,%_reduction_by_antithetic,%_reduction_by_control
Brooklyn-Bound-Control,0.000102831,9.81626e-05,3.94779e-05,4.53996,61.609
Brooklyn-Bound-Extra-Trains,8.78575e-05,8.31658e-05,3.34938e-05,5.34014,61.8772
Brooklyn-Bound-Limit-People,7.62599e-06,6.26656e-06,3.62036e-06,17.8262,52.526
Manhattan-Bound-Control,0.000331949,0.000236096,0.000198496,28.8756,40.2029
Manhattan-Bound-Extra-Trains,0.000295259,0.000231876,0.000182024,21.4672,38.3513
Manhattan-Bound-Limit-People,9.03748e-06,8.11491e-06,1.15499e-06,10.2083,87.22


In [9]:
df_total['%_reduction_by_antithetic'] = pd.Series(["{0:.2f}%".format(val) for val in df_total['%_reduction_by_antithetic']], index = df_total.index)
df_total['%_reduction_by_control'] = pd.Series(["{0:.2f}%".format(val) for val in df_total['%_reduction_by_control']], index = df_total.index)
df_total

Unnamed: 0,original_var_var,antithetic_var_var,difference_estim_var,%_reduction_by_antithetic,%_reduction_by_control
Brooklyn-Bound-Control,0.000102831,9.81626e-05,3.94779e-05,4.54%,61.61%
Brooklyn-Bound-Extra-Trains,8.78575e-05,8.31658e-05,3.34938e-05,5.34%,61.88%
Brooklyn-Bound-Limit-People,7.62599e-06,6.26656e-06,3.62036e-06,17.83%,52.53%
Manhattan-Bound-Control,0.000331949,0.000236096,0.000198496,28.88%,40.20%
Manhattan-Bound-Extra-Trains,0.000295259,0.000231876,0.000182024,21.47%,38.35%
Manhattan-Bound-Limit-People,9.03748e-06,8.11491e-06,1.15499e-06,10.21%,87.22%
