In [1]:
from fpdf import FPDF
import calendar
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sb
import arrow
import time

In [2]:
# Set the Country to be pulled
country = 'chinese' # 'chinese' for China and 'korean' for Korea
# Set user name
UN = 'mfitz' # 'mfitz' for LAPPIE, 'Me' for GLaDOS
# Read CSVs into dataframes
path = r'C:\Users\{}\Documents\GitHub Clones\{}-student-dummy-database'.format(UN, country)

students = pd.read_csv(r'{}\Tables\students.csv'.format(path))
classes = pd.read_csv(r'{}\Tables\classes.csv'.format(path))
tests = pd.read_csv(r'{}\Tables\tests.csv'.format(path))
test_results = pd.read_csv(r'{}\Tables\test_results.csv'.format(path))
attendance = pd.read_csv(r'{}\Tables\attendance.csv'.format(path))
hw_g = pd.read_csv(r'{}\Tables\hw_grades.csv'.format(path))
hw_a = pd.read_csv(r'{}\Tables\hw_assignments.csv'.format(path))
participation = pd.read_csv(r'{}\Tables\participation.csv'.format(path))
points = pd.read_csv(r'{}\Tables\points.csv'.format(path))

## Find the Student's ID

In [3]:
students['STUDENT_NAME_ENGLISH']

0         Aline
1       Augusta
2      Samantha
3         Shane
4       Colleen
         ...   
495       James
496        Mike
497      Martha
498      George
499       Kevin
Name: STUDENT_NAME_ENGLISH, Length: 500, dtype: object

In [4]:
# Find the student you want to generate a report for
student_name = 'Kevin'
# Return a all columns from the Student DataFrame of all students with the above name
students.loc[students['STUDENT_NAME_ENGLISH'] == student_name]

Unnamed: 0,STUDENT_ID,STUDENT_NAME_LOCAL,STUDENT_NAME_ENGLISH,STUDENT_AGE,STUDENT_SEX,STUDENT_ENROLLED,STUDENT_FIRST_DAY,STUDENT_LAST_DAY,CLASS_ID
68,68,郑孔,Kevin,11,male,Y,,,2
499,499,井辅,Kevin,4,female,Y,,,0


In [5]:
# Set the student id to the id of the student you want.
student_id = 68
# Get this student's class id.
class_id = students['CLASS_ID'].loc[students['STUDENT_ID'] == student_id].item()
# Get the day of their class
day = classes['CLASS_DAY'].loc[classes['CLASS_ID'] == class_id].item()
# Get the time of their class
time = classes['CLASS_TIME'].loc[classes['CLASS_ID'] == class_id].item()
# Get the student's local name
student_name_local = students['STUDENT_NAME_LOCAL'].loc[students['STUDENT_ID'] == student_id].item()

## Define Various Time Variables

In [6]:
# Set the date
setDate = datetime(2014, 7, 23)
# Get today's date
today = datetime.now().date()
today = setDate
# Get the year
year = today.year
# Get this month's number
month = today.month
# Get this month's  name
monthName = calendar.month_name[month]
# Get this month's 3-letter abbreveation
monthAbbr = calendar.month_abbr[month]
# Format today's date to be used in the report
todayString = today.strftime("%Y-%m-%d")
# Declare the quarter name based on this month
if(month == 12 or month == 1 or month == 2):
    thisQuarter = "winter"
elif(month == 3 or month == 4 or month == 5):
    thisQuarter = "spring"
elif(month == 6 or month == 7 or month == 8):
    thisQuarter = "summer"
elif(month == 9 or month == 10 or month == 11):
    thisQuarter = "fall"

In [7]:
# Get the day's abbreviation
dayAbbr = day[0:3]
# Get the month's first day
month_first_day = today.replace(day = 1)
# Get the month's last day
year, last_day = calendar.monthrange(year, month)
month_last_day = today.replace(day = last_day)
# Define a frequency based on the day the student attends class
days = pd.offsets.CustomBusinessDay(weekmask='{}'.format(dayAbbr))
# Define a list of dates for the month
classDates = pd.bdate_range(start = month_first_day, end = month_last_day, freq = days).to_pydatetime().tolist()
testDate = max(classDates).strftime('%Y-%m-%d')

# Student Report

## Tests

### Tests Statistics

In [11]:
# Build a DataFrame of all test results of all students in this student's class
class_test_results = test_results.loc[test_results['CLASS_ID'] == class_id]
# Get the average test score of the student's entire class.
class_Test_AVG_all_time = class_test_results['TEST_SCORE_PERCENTAGE'] .mean()
# Get the maximum test score of the student's entire class.
class_Test_MAX_all_time = class_test_results['TEST_SCORE_PERCENTAGE'] .max()
# Get the minimum test score of the student's entire class.
class_Test_MIN_all_time = class_test_results['TEST_SCORE_PERCENTAGE'] .min()

# Get the average class grade for each test that this class took
class_Tests_AVGs = []
test_ids = range(class_test_results['TEST_ID'].min(), class_test_results['TEST_ID'].max() + 1)
for i in test_ids:
    tests = class_test_results.loc[class_test_results['TEST_ID'] == i]
    class_Tests_AVGs.append(tests['TEST_SCORE'].mean())

# Build a DataFrame of TEST_IDs and class AVG score
testAverages = pd.DataFrame(columns = ['TEST_ID', 'AVERAGE_SCORE'])
testAverages['TEST_ID'] = range(class_test_results['TEST_ID'].min(), class_test_results['TEST_ID'].max() + 1)
testAverages['AVERAGE_SCORE'] = class_Tests_AVGs
testAverages['AVERAGE_PERCENT'] = testAverages['AVERAGE_SCORE'] / 50

### Tests Plots

In [None]:
# Line Plot of grades over time
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotTestGradesLine = sb.lineplot(x = test_dates, y = test_grades, legend = False)

In [None]:
# Histogram of grades (A, B, C, D, F)
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotTestGradesHistogram = sb.histplot(data = test_grades, bins = [0.0, 0.599, 0.699, 0.799, 0.899, 1.0])

In [None]:
# Box Plot of grades (A, B, C, D, F)
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotTestGradesBox = sb.boxplot(x = test_grades)
plotTestGradesBox = sb.swarmplot(x = test_grades, color = 'red', size = 10)

### Tests Report

## Attendance

### Attendance Statistics

In [None]:
# MISCELLANEOUS STATISTICS

# Get the student's attendance record
attendanceRecord = attendance.loc[attendance['STUDENT_ID'] == student_id]
# Convert the attendance record to a list
attendanceRecord_list = attendanceRecord['ATTENDANCE_STATUS'].value_counts().to_list()
# Extract the number of present days from the list
daysPresent = attendanceRecord_list[0]
# Extract the number of absent days from the list
daysAbsent = attendanceRecord_list[1]
# Extract the dates and reasons of all missed classes
blah = attendanceRecord.loc[attendanceRecord['ATTENDANCE_STATUS'] == 'absent']
missedClasses = blah.drop(['STUDENT_ID', 'ATTENDANCE_STATUS'], axis = 1)

# AVERAGES

# Average number of classes attended before an absence
averageNumberOfClassesBeforeAbsence = (daysAbsent + daysPresent) / daysAbsent

# PERCENTAGES

# Percent of missed classes
perMissedClasses = daysAbsent / (daysPresent + daysAbsent)
perMissedClassesFormat = "{:.2%}".format(daysAbsent / (daysPresent + daysAbsent))

### Attendance Plots

### Attendance Report

## Homework

### Homework Statistics

In [None]:
# Get the scores of all hws for this student.
hw_percents = hw_g['HW_SCORE_PERCENT'].loc[hw_g['STUDENT_ID'] == student_id]
# Get the dates of all the hws this student took.
hw_dates = hw_g['HW_ASSIGNED_DATE'].loc[hw_g['STUDENT_ID'] == student_id]
# Get the scores of all hws for all students in this student's class.
hw_percents_class = hw_g['HW_SCORE_PERCENT'].loc[hw_g['CLASS_ID'] == class_id]

hwAVG = hw_percents.mean() # Get the average hw score for the specified student.
hwMAX = hw_percents.max() # Get the maximum hw score for the specified student.
hwMIN = hw_percents.min() # Get the minimum hw score for the specified student.
hwClassAVG = hw_percents_class.mean() # Get the average hw score of the student's entire class.
hwClassMAX = hw_percents_class.max() # Get the maximum hw score of the student's entire class.
hwClassMIN = hw_percents_class.min() # Get the minimum hw score of the student's entire class.

### Homework Plots

In [None]:
# Plot HW Grades
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
testGradesPlot = sb.scatterplot(x = hw_dates, y = hw_percents, legend = False)

### Homework Report

## Participation

In [None]:
partType = participation['PARTICIPATION_TYPE']
partStudent = participation.loc[participation['STUDENT_ID'] == student_id]
part_dates = partStudent['PARTICIPATION_DATETIME'] # Get the dates of all the hws this student took.
partDateCount = partStudent['PARTICIPATION_DATETIME'].value_counts()
partTypeCount = partStudent['PARTICIPATION_TYPE'].value_counts()

### Participation Statistics

In [None]:
# PERCENTAGES

# Percent of all participation instances that are volunteers
perVolunteers = partTypeCount[1]/partStudent['STUDENT_ID'].count()
# Percent of all participation instances that are cold calls
perColdCalls = partTypeCount[0]/partStudent['STUDENT_ID'].count()

# AVERAGES

# Get a series object of the mean of cold calls and volunteers per class
class_by_part_type = partStudent.groupby(['PARTICIPATION_DATETIME', 'PARTICIPATION_TYPE'])['STUDENT_ID'].count().groupby(['PARTICIPATION_TYPE']).mean()

# Average number of participation attempts per participation instance
meanAttempts = partStudent['PARTICIPATION_ATTEMPTS'].mean()
# Average number of participation attempts per volunteer
meanAttemptsVolunteer = partStudent['PARTICIPATION_ATTEMPTS'].loc[partStudent['PARTICIPATION_TYPE'] == 'volunteer'].mean()
# Average number of participation attempts per cold call
meanAttemptsColdCall = partStudent['PARTICIPATION_ATTEMPTS'].loc[partStudent['PARTICIPATION_TYPE'] == 'cold call'].mean()
# Average number of participation instances per class
partStudent.groupby(by = 'PARTICIPATION_DATETIME')['STUDENT_ID'].agg('count').mean()
# Average number of cold calls per class
meanClassColdCall = class_by_part_type[0]
# Average number of volunteers per class
meanClassVolunteer = class_by_part_type[1]
# Average number of hints per participation instance
meanHints = partStudent['PARTICIPATION_HINTS'].mean()
# Average number of hints per volunteer
meanHintsVolunteer = partStudent['PARTICIPATION_HINTS'].loc[partStudent['PARTICIPATION_TYPE'] == 'volunteer'].mean()
# Average number of hints per cold call
meanHintsColdCall = partStudent['PARTICIPATION_HINTS'].loc[partStudent['PARTICIPATION_TYPE'] == 'cold call'].mean()

### Participation Plots

In [None]:
# Plot the count of number of attempts
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotAttempts = sb.countplot(data = partStudent, x = partStudent['PARTICIPATION_ATTEMPTS'])

In [None]:
# Plot the count of number of hints
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotAttempts = sb.countplot(data = partStudent, x = partStudent['PARTICIPATION_HINTS'])

In [None]:
# Plot the count of participation type
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
plotType = sb.countplot(data = partStudent, x = partType)

In [None]:
# Plot participation over each date
fig_dims = (16, 12)
fig, ax = plt.subplots(figsize = fig_dims)
partOverTime = sb.scatterplot(data = partStudent, x = "PARTICIPATION_DATETIME", y = "PARTICIPATION_ATTEMPTS", estimator = None, hue = partType)

### Participation Report

In [None]:
# Instantiate an FPDF object
pdf = FPDF()
# Add a page to the PDF doc
pdf.add_page()
# Effective page width, or just epw
epw = pdf.w - 2*pdf.l_margin
# Set column width to 1/4 of effective page width to distribute content evenly across table and page
col_width = epw/3
# Document Title
title = 'Participation Report'
pdf.set_font('Times', 'B', 18)
pdf.cell(epw, 10, title, 0, 1, 'C')
# Add the lines
pdf.cell(0, 10, "{}".format(student_name), 0, 1, 'C')
pdf.add_font('simhei', '', r'C:\Windows\Fonts\simhei.ttf', uni=True)
pdf.set_font('simhei', '', 14)
pdf.cell(0, 10, "{}".format(student_name_local), 0, 1, 'C')
# Save the PDF and export it
pdf.output("({}) {}_{}.pdf".format(title, student_name, student_name_local)).encode('latin-1','ignore')
os.startfile("({}) {}_{}.pdf".format(title, student_name, student_name_local))