In [136]:
import pandas as pd
from faker import Faker
import random as rnd
from typing import *

In [137]:
# Integrate if time
def importExcel() -> Tuple[pd.DataFrame]: 
    collegeAdmissionsDf = pd.read_excel("CollegeAdmissionsData.xlsx")
    collegeAdmissionsDf = collegeAdmissionsDf.rename(columns={"Unnamed: 0": "College"})

    boulderTuitionCostDf = pd.read_excel("BoulderTuitionCost.xlsx")
    return((collegeAdmissionsDf, boulderTuitionCostDf))

In [138]:
class Student:
    def __init__(self, studentFirstName, studentLastName, sTotal, actTotal, gradePointAverage, admissionType, schoolType):
        self.studentFirstName = studentFirstName
        self.studentLastName = studentLastName
        self.sTotal = sTotal
        self.actTotal = actTotal
        self.gradePointAverage = gradePointAverage
        self.admissionType = admissionType
        self.schoolType = schoolType

In [139]:
def scoreGeneratorHelper() -> Tuple[float, float, float]:
    scoreList = []
    randomNumber = rnd.gauss(.85,.1)
    if (randomNumber > .9 and (rnd.uniform(0,1) > .75)):
        newRandomNumber = 1.1
        totalSatScore = round(randomNumber * 1600)
        totalActScore = round(randomNumber * 36)
        totalGpa = round(newRandomNumber * 4.0,2)
        scoreList.append((totalSatScore, totalActScore, totalGpa))
    else:
        totalSatScore = round(randomNumber * 1600)
        totalActScore = round(randomNumber * 36)
        totalGpa = round(randomNumber * 4.0,2)
        if (totalSatScore+150) < 1600:
            totalSatScore += 150
            scoreList.append((totalSatScore, totalActScore, totalGpa))
            return scoreList
        scoreList.append((totalSatScore, totalActScore, totalGpa))
    return scoreList

In [140]:
def admissionTypeHelper() -> str:
    randomNumber = rnd.uniform(0, 1)
    if(randomNumber > .5):
        admissionType = "In State"
        return admissionType
    elif(randomNumber > .02):
        admissionType = "Out of State"
        return admissionType
    else:
        admissionType = "International"
        return admissionType

In [141]:
def schoolTypeHelper() -> str:
    randomNumber = rnd.uniform(0,1)
    if randomNumber > .571:
        schoolType = "Arts and Sciences"
        return schoolType
    elif randomNumber > .341:
        schoolType = "Engineering"
        return schoolType
    elif randomNumber > .218:
        schoolType = "Business"
        return schoolType
    elif randomNumber > 129:
        schoolType = "Exploratory Studies"
        return schoolType
    elif randomNumber > 63:
        schoolType = "Media, Communication, and Information"
        return schoolType
    elif randomNumber > 46:
        schoolType = "Environmental Design"
        return schoolType
    elif randomNumber > 33:
        schoolType = "Education"
        return schoolType
    elif randomNumber > 18:
        schoolType = "Law"
        return schoolType
    elif randomNumber > 5:
        schoolType = "Music"
        return schoolType
    else:
        schoolType = "Multi-Disciplinary"
        return schoolType

In [142]:
def createApplicantList() -> List[Student]:
    studentList = []
    fake = Faker()
    for i in range(0,55000):
        fakeFirstName = fake.first_name()
        fakeLastName = fake.last_name()
        scoreList = scoreGeneratorHelper()
        admissionType = admissionTypeHelper()
        schoolType = schoolTypeHelper()
        studentObject = Student(fakeFirstName, fakeLastName, scoreList[0][0], scoreList[0][1], scoreList[0][2], admissionType, schoolType)
        studentList.append(studentObject)
    return(studentList)

In [143]:
def devryAdmissionGraderHelper(row, dataDataFrame):
    if row["SAT Total"] > dataDataFrame["SAT Total"].iloc[0]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[0]:
            return True
    elif row["ACT Total"] > dataDataFrame["ACT Total"].iloc[0]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[0]:
            return True
    else:
        return False

In [144]:
def boulderAdmissionGraderHelper(row, dataDataFrame):
    if row["SAT Total"] > dataDataFrame["SAT Total"].iloc[1]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[1]:
            return True
    elif row["ACT Total"] > dataDataFrame["ACT Total"].iloc[1]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[1]:
            return True
    else:
        return False

In [145]:
def berkeleyAdmissionGraderHelper(row, dataDataFrame):
    if row["SAT Total"] > dataDataFrame["SAT Total"].iloc[2]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[2]:
            return True
    elif row["ACT Total"] > dataDataFrame["ACT Total"].iloc[2]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[2]:
            return True
    else:
        return False

In [146]:
def harvardAdmissionGraderHelper(row, dataDataFrame):
    if row["SAT Total"] > dataDataFrame["SAT Total"].iloc[3]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[3]:
            return True
    elif row["ACT Total"] > dataDataFrame["ACT Total"].iloc[3]:
        if row["GPA"] > dataDataFrame["GPA"].iloc[3]:
            return True
    else:
        return False

In [147]:
def admissionGrader(inputStudentList, dataDataFrame) -> pd.DataFrame:
    studentList = []
    for student in inputStudentList:
        studentList.append({"FirstName": student.studentFirstName, "LastName": student.studentLastName, "SAT Total": student.sTotal, "ACT Total": student.actTotal, "GPA": student.gradePointAverage})
    studentDf = pd.DataFrame(studentList)
    studentDf["DeVry Admission"] = studentDf.apply(lambda row: devryAdmissionGraderHelper(row, dataDataFrame), axis=1)
    studentDf["Boulder Admission"] = studentDf.apply(lambda row: boulderAdmissionGraderHelper(row, dataDataFrame), axis=1)
    studentDf["Berkeley Admission"] = studentDf.apply(lambda row: berkeleyAdmissionGraderHelper(row, dataDataFrame), axis=1)
    studentDf["Harvard Admission"] = studentDf.apply(lambda row: harvardAdmissionGraderHelper(row, dataDataFrame), axis=1)

    return(studentDf)

In [148]:
def boulderAdmitted(inputStudentDf, inputStudentList) -> pd.DataFrame:
    studentList = []
    for student in inputStudentList:
        studentList.append({"FirstName": student.studentFirstName, "LastName": student.studentLastName, "Admission Type": student.admissionType, "School Type": student.schoolType})
    boulderStudentDf = pd.DataFrame(studentList)
    mergedDf = pd.merge(boulderStudentDf, inputStudentDf[["Boulder Admission", "FirstName", "LastName"]], on=["FirstName","LastName"])
    mergedDf = mergedDf[mergedDf["Boulder Admission"] == True]
    return mergedDf

In [149]:
# def schoolToTierHelper(row):
#     inputSchoolType = row["School Type"]
    
#     match inputSchoolType:
        
#         case "Arts and Sciences":
#             return "Tier 1"
#         case "Engineering":
#             return "Tier 3"
#         case "Business":
#             return "Tier 4"
#         case "Exploratory Studies":
#             return "Tier 1"
#         case "Media, Communication, and Information":
#             return "Tier 2"
#         case "Environmental Design":
#             return "Tier 2"
#         case "Education":
#             return "Tier 1"
#         case "Law":
#             return "Tier 3"
#         case "Music":
#             return "Tier 1"
#         case "Multi-Disciplinary":
#             return "Tier 3"        

# Appears that my version of python does not support match statements. Had to switch to if / elif / else instead

In [150]:
def schoolToTierHelper(row):
    inputSchoolType = row["School Type"]
    
    if inputSchoolType == "Arts and Sciences":
        return "Tier 1"
    elif inputSchoolType == "Engineering":
        return "Tier 3"
    elif inputSchoolType == "Business":
        return "Tier 4"
    elif inputSchoolType == "Exploratory Studies":
        return "Tier 1"
    elif inputSchoolType == "Media, Communication, and Information":
        return "Tier 2"
    elif inputSchoolType == "Environmental Design":
        return "Tier 2"
    elif inputSchoolType == "Education":
        return "Tier 1"
    elif inputSchoolType == "Law":
        return "Tier 3"
    elif inputSchoolType == "Music":
        return "Tier 1"
    elif inputSchoolType == "Multi-Disciplinary":
        return "Tier 3"

In [151]:
def tuitionCalculatorHelper(row, tuitionCostDf):
    admission_type = row["Admission Type"]
    tier = row["Tier"]
    if admission_type == "In State":
        if tier == "Tier 1":
            return tuitionCostDf["Tier One"].iloc[0]
        elif tier == "Tier 2":
            return tuitionCostDf["Tier Two"].iloc[0]
        elif tier == "Tier 3":
            return tuitionCostDf["Tier Three"].iloc[0]
        elif tier == "Tier 4":
            return tuitionCostDf["Tier Four"].iloc[0]
    elif admission_type == "Out of State":
        if tier == "Tier 1":
            return tuitionCostDf["Tier One"].iloc[1]
        elif tier == "Tier 2":
            return tuitionCostDf["Tier Two"].iloc[1]
        elif tier == "Tier 3":
            return tuitionCostDf["Tier Three"].iloc[1]
        elif tier == "Tier 4":
            return tuitionCostDf["Tier Four"].iloc[1]
    elif admission_type == "International":
        if tier == "Tier 1":
            return tuitionCostDf["Tier One"].iloc[2]
        elif tier == "Tier 2":
            return tuitionCostDf["Tier Two"].iloc[2]
        elif tier == "Tier 3":
            return tuitionCostDf["Tier Three"].iloc[2]
        elif tier == "Tier 4":
            return tuitionCostDf["Tier Four"].iloc[2]

In [152]:
def totalBudgetCalculator(boulderDataFrame, tuitionCostDf) -> Tuple[pd.DataFrame, int]:
    boulderDataFrame = boulderDataFrame.sample(7106, random_state=42)
    boulderDataFrame["Tier"] = boulderDataFrame.apply(schoolToTierHelper, axis=1)
    boulderDataFrame["Cost"] = boulderDataFrame.apply(lambda row: tuitionCalculatorHelper(row, tuitionCostDf), axis=1)
    return ((boulderDataFrame, boulderDataFrame["Cost"].sum()))

In [154]:
def main():
    dataDataFrameList = importExcel()
    studentList = createApplicantList()
    studentDataFrame = admissionGrader(studentList, dataDataFrameList[0])
    boulderDataFrame = boulderAdmitted(studentDataFrame, studentList)
    budgetUnpack = totalBudgetCalculator(boulderDataFrame, dataDataFrameList[1])
    print(budgetUnpack[1])

main()

164774093
