# Project CAPES: Combining Datasets

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
# Make an array of department names of files in CAPES_CSV
Dept_Array = np.array([])
with open('Capes_Dept.txt','r') as f:
    for dept in f:
        Dept_Array = np.append(Dept_Array,dept[:-1])
        
Dept_Array

array(['RELI', 'LIT', 'PHIL', 'FPMU', 'ELWR', 'MUIR', 'TWS', 'COMM',
       'HDP', 'ENVR', 'CAT', 'STPA', 'INTL', 'CONT', 'BENG', 'PSYC',
       'LAWS', 'LATI', 'WARR', 'ERC', 'CGS', 'ECON', 'BIOL', 'HIST',
       'HUM', 'PHYS', 'REV', 'ANTH', 'VIS', 'CSE', 'SE', 'MGT', 'THEA',
       'ICAM', 'SXTH', 'EDS', 'DOC', 'COGS', 'JAPN', 'SOCI', 'NANO',
       'SIO', 'HMNR', 'JUDA', 'USP', 'ETHN', 'ENG', 'MUS', 'FILM', 'MMW',
       'CENG', 'MATH', 'CHEM', 'TMC', 'CHIN', 'WCWP', 'POLI', 'MAE',
       'ECE', 'ESYS', 'LIGN', 'DSC', 'PHYS', 'SYN'], dtype='<U32')

In [3]:
# Takes in a department and formats it into a proper file
def Dept_to_Filename(dept):
    return "CAPES_CSV/CAPES_"+dept+".csv"
Dept_to_Filename_v = np.vectorize(Dept_to_Filename)

# Convert the department array into their proper filenames
Filename_Array = Dept_to_Filename_v(Dept_Array)
Filename_Array

array(['CAPES_CSV/CAPES_RELI.csv', 'CAPES_CSV/CAPES_LIT.csv',
       'CAPES_CSV/CAPES_PHIL.csv', 'CAPES_CSV/CAPES_FPMU.csv',
       'CAPES_CSV/CAPES_ELWR.csv', 'CAPES_CSV/CAPES_MUIR.csv',
       'CAPES_CSV/CAPES_TWS.csv', 'CAPES_CSV/CAPES_COMM.csv',
       'CAPES_CSV/CAPES_HDP.csv', 'CAPES_CSV/CAPES_ENVR.csv',
       'CAPES_CSV/CAPES_CAT.csv', 'CAPES_CSV/CAPES_STPA.csv',
       'CAPES_CSV/CAPES_INTL.csv', 'CAPES_CSV/CAPES_CONT.csv',
       'CAPES_CSV/CAPES_BENG.csv', 'CAPES_CSV/CAPES_PSYC.csv',
       'CAPES_CSV/CAPES_LAWS.csv', 'CAPES_CSV/CAPES_LATI.csv',
       'CAPES_CSV/CAPES_WARR.csv', 'CAPES_CSV/CAPES_ERC.csv',
       'CAPES_CSV/CAPES_CGS.csv', 'CAPES_CSV/CAPES_ECON.csv',
       'CAPES_CSV/CAPES_BIOL.csv', 'CAPES_CSV/CAPES_HIST.csv',
       'CAPES_CSV/CAPES_HUM.csv', 'CAPES_CSV/CAPES_PHYS.csv',
       'CAPES_CSV/CAPES_REV.csv', 'CAPES_CSV/CAPES_ANTH.csv',
       'CAPES_CSV/CAPES_VIS.csv', 'CAPES_CSV/CAPES_CSE.csv',
       'CAPES_CSV/CAPES_SE.csv', 'CAPES_CSV/CAPES_MGT.csv',
     

In [4]:
Capes = [pd.read_csv(filename) for filename in Filename_Array]
Capes = pd.concat(Capes, ignore_index=True)
Capes

Unnamed: 0,Instructor,Course,Term,Enroll,Evals Made,Rcmnd Class,Rcmnd Instr,Study Hrs/wk,Avg Grade Expected,Avg Grade Received
0,"Kalleres, Dayna",RELI 1 - Introduction to Religion (A),FA22,37,11,90.9%,100.0%,2.50,B (3.20),A (4.00)
1,"Rahimi, Babak",RELI 149 - Islam in America (A),S122,40,5,100.0%,100.0%,3.70,A- (3.80),A- (3.98)
2,"Rahimi, Babak",RELI 101 - Tools&Methods/Studyof Religion (A),WI22,39,10,90.0%,90.0%,3.61,B+ (3.67),A- (3.73)
3,"Rahimi, Babak",RELI 188 - Special Topics in Religion (A),WI22,34,11,80.0%,90.0%,3.90,A- (3.90),A- (3.81)
4,"Kalleres, Dayna",RELI 1 - Introduction to Religion (A),FA21,37,18,81.3%,75.0%,2.50,A- (3.85),A- (3.74)
...,...,...,...,...,...,...,...,...,...,...
61301,"Gladstein, Jill M",SYN 1 - Perspectives/Changing Planet (0),WI22,404,243,90.9%,93.4%,3.95,A- (3.94),A- (3.90)
61302,"Gladstein, Jill M",SYN 2 - Explorations/Changing Planet (0),WI22,191,93,77.0%,88.5%,4.18,A- (3.79),A- (3.89)
61303,"Gladstein, Jill M",SYN 2 - Explorations/Changing Planet (0),FA21,319,135,68.0%,74.4%,3.89,A- (3.93),A- (3.88)
61304,"Gladstein, Jill M",SYN 1 - Perspectives/Changing Planet (0),SP21,235,107,90.6%,95.3%,5.15,A- (3.85),A- (3.88)


We save this pristine raw data as a `csv` file.

In [5]:
Capes.to_csv('Capes_raw.csv')