In [1]:
from IPython.display import display

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import json
from collections import defaultdict

from Bucket import Bucket
from Distribution import Distribution
from GradeSalaryHistory import GradeSalaryHistory


pd.set_option('display.max_columns', None)
plt.style.use('ggplot')

# Show matplotlib plots in this notebook
%matplotlib inline
# Setting plot parameters
from pylab import rcParams
params = {
    'figure.figsize': (8, 8),
    'legend.fontsize': 15
}
rcParams.update(params)

## Calculating data per coop term

In [2]:
def get_coop_summary(num):
    salaries = []
    with open('../private/rows/coop_salary_' + str(num) + '.csv', 'r') as f:
        first = True
        for i in f.readlines():
            if first:
                first = False
                continue

            if i.split(',')[1].strip() == '':
                continue
            val = float(''.join(i.replace('"', '').split(',')[1:]).strip())
            if val <= 100:
                continue
            salaries.append(val)
    print('Mean: ' + str(np.mean(salaries)))
    print('Standard Deviation: ' + str(np.std(salaries)))
    print('Size: ' + str(np.size(salaries)))

In [3]:
for i in range(1, 7):
    print('Coop term #' + str(i))
    get_coop_summary(i)
    print('\n')

Coop term #1
Mean: 2914.6247409523808
Standard Deviation: 1134.3700423963214
Size: 105


Coop term #2
Mean: 4236.191445454546
Standard Deviation: 2067.6391842674298
Size: 110


Coop term #3
Mean: 5895.652682727274
Standard Deviation: 3594.6262012228067
Size: 110


Coop term #4
Mean: 7981.304238532111
Standard Deviation: 3498.7648385787347
Size: 109


Coop term #5
Mean: 9338.25503181818
Standard Deviation: 3429.123800084302
Size: 110


Coop term #6
Mean: 10015.822339622644
Standard Deviation: 3420.511930365345
Size: 106




In [10]:
def get_term_summary(num):
    avgs = []
    with open('../private/rows/term_avg_' + str(num) + '.csv', 'r') as f:
        first = True
        for i in f.readlines():
            if first:
                first = False
                continue

            if i.split(',')[1].strip() == '':
                continue
            if i.split(',')[1].strip() == 'exchange':
                continue
            val = float(''.join(i.replace('"', '').split(',')[1:]).strip())
            avgs.append(val)
    print('Mean: ' + str(np.mean(avgs)))
    print('Standard Deviation: ' + str(np.std(avgs)))
    print('Size: ' + str(np.size(avgs)))
    print(sorted(avgs))

In [11]:
for i in ['1a', '1b', '2a', '2b', '3a', '3b', '4a']:
    print('Study Term #' + str(i))
    get_term_summary(i)
    print('\n')

Study Term #1a
Mean: 84.08990990990992
Standard Deviation: 8.870520516075594
Size: 111
[58.82, 63.18, 64.0, 64.46, 66.0, 66.8, 67.73, 68.0, 68.18, 69.0, 71.91, 72.0, 72.36, 74.0, 74.27, 74.46, 74.91, 75.0, 75.0, 76.27, 76.36, 76.73, 76.73, 77.0, 77.0, 77.0, 77.18, 78.91, 79.0, 79.2, 79.82, 79.91, 80.0, 80.0, 80.0, 80.91, 81.0, 81.0, 81.18, 81.36, 81.6, 81.91, 82.0, 82.0, 82.0, 82.46, 82.46, 84.0, 84.0, 84.0, 84.0, 84.09, 84.18, 84.36, 84.36, 84.73, 85.0, 85.64, 85.73, 86.1, 86.27, 86.91, 87.0, 87.0, 87.0, 87.0, 87.55, 87.64, 88.0, 88.5, 88.82, 89.0, 89.0, 89.46, 90.0, 90.0, 90.0, 90.0, 90.27, 90.36, 90.4, 90.55, 91.09, 91.09, 92.0, 92.55, 92.6, 92.64, 92.64, 92.73, 92.73, 93.0, 93.0, 93.0, 93.18, 93.27, 94.0, 94.18, 94.36, 94.5, 94.82, 94.82, 95.0, 95.0, 95.0, 96.0, 96.73, 97.8, 97.9, 98.0, 98.36]


Study Term #1b
Mean: 80.59818181818181
Standard Deviation: 9.513253738559426
Size: 110
[49.5, 50.4, 60.0, 63.0, 65.8, 66.0, 66.0, 67.0, 67.0, 67.0, 67.0, 67.4, 68.0, 68.0, 68.4, 69.0, 69.6,

In [6]:
def get_attendance_summary(num):
    avgs = []
    with open('../private/rows/class_attendance_' + str(num) + '.csv', 'r') as f:
        first = True
        for i in f.readlines():
            if first:
                first = False
                continue

            if i.split(',')[1].strip() == '':
                continue
            val = float(''.join(i.replace('"', '').split(',')[1:]).strip())
            avgs.append(val)
    print('Mean: ' + str(np.mean(avgs)))
    print('Standard Deviation: ' + str(np.std(avgs)))
    print('Size: ' + str(np.size(avgs)))

In [7]:
# Attendance
for i in ['1a', '1b', '2a', '2b', '3a', '3b', '4a']:
    print('Attendance per Term #' + str(i))
    get_attendance_summary(i)
    print('\n')

Attendance per Term #1a
Mean: 3.7610619469026547
Standard Deviation: 0.5199444538608277
Size: 113


Attendance per Term #1b
Mean: 3.274336283185841
Standard Deviation: 0.8948998817845736
Size: 113


Attendance per Term #2a
Mean: 3.0973451327433628
Standard Deviation: 0.8088514939656533
Size: 113


Attendance per Term #2b
Mean: 2.982300884955752
Standard Deviation: 0.8090451149125236
Size: 113


Attendance per Term #3a
Mean: 2.814159292035398
Standard Deviation: 0.8879595040432519
Size: 113


Attendance per Term #3b
Mean: 2.4601769911504423
Standard Deviation: 0.9779461793657561
Size: 113


Attendance per Term #4a
Mean: 2.504424778761062
Standard Deviation: 1.0055060259568664
Size: 113


