In [2]:
# Dependencies
import pandas as pd

In [3]:
# load CSV
coders = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [4]:
# Read with pandas--low_memory required to suppress errors about mixed data types
coders_df = pd.read_csv(coders, encoding = "iso-8859-1", low_memory = False)
coders_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [5]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111

new_df = coders_df.iloc[:, [0, 1, 2, 3, 4, 7, 8, 9, 10,11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]
new_df.columns

Index(['Age', 'AttendedBootcamp', 'BootcampFinish', 'BootcampFullJobAfter',
       'BootcampLoanYesNo', 'BootcampPostSalary', 'BootcampRecommend',
       'ChildrenNumber', 'CityPopulation', 'CodeEventBootcamp', 'CountryLive',
       'EmploymentField', 'EmploymentStatus', 'Gender', 'HasChildren',
       'HoursLearning', 'Income', 'JobRoleInterest', 'SchoolDegree',
       'SchoolMajor'],
      dtype='object')

In [17]:
# Change reading 0 and 1 to No and Yes, respectively
yesno_df = new_df.replace({0: "No", 1: "Yes"})

yesno_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,more than 1 million,,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [18]:
# Extract rows for only those who attended a bootcamp
# Extract rows corresponding only to people who attended a bootcamp
attended = yesno_df.loc[yesno_df["AttendedBootcamp"]=="Yes"]
attended.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,,No,,"between 100,000 and 1 million",,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,45000.0,No,,more than 1 million,,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,75000.0,Yes,3.0,"less than 100,000",,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,,No,,"between 100,000 and 1 million",,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,No,,"between 100,000 and 1 million",,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [8]:
# Create DataFrame of the different boot camps that had a significant number of attendees
bootcamps = pd.DataFrame(coders_df["BootcampName"].value_counts())
#bootcamps.head()

bootcamps.reset_index(inplace=True)
bootcamps.columns = ["BootcampName", "Number of Attendees"]
bootcamps_df = bootcamps.loc[(bootcamps["Number of Attendees"] >= 10)]
bootcamps_df.head()

Unnamed: 0,BootcampName,Number of Attendees
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [9]:
# Count how many attendees of each bootcamp would recommend the bootcamp
recommenders_df = coders_df.replace({"No":0, "Yes":1})

#recommenders_yes = recommenders_df.loc[recommenders_df["BootcampRecommend"]=="Yes"]

recommenders_df = pd.DataFrame(coders_df.groupby("BootcampName")["BootcampRecommend"].sum())
recommenders_df.reset_index(inplace=True)
recommenders_df.columns = ["BootcampName", "Number of Recommenders"]
recommenders_df.head()

Unnamed: 0,BootcampName,Number of Recommenders
0,10x.org.il,3.0
1,4Geeks Academy,4.0
2,AcadGild,2.0
3,Academia de CÃ³digo,4.0
4,Academic Work Academy,0.0


In [19]:
# Merge the two created data frames on the name of the bootcamp
df_merged = pd.merge(bootcamps_df, recommenders_df, on='BootcampName', how='inner')
df_merged.head()

Unnamed: 0,BootcampName,Number of Attendees,Number of Recommenders
0,General Assembly,90,70.0
1,Flatiron School,54,50.0
2,Dev Bootcamp,48,41.0
3,The Iron Yard,40,31.0
4,Prime Digital Academy,30,25.0


In [16]:
df_merged["Percentage of Recommenders"] = round(df_merged["Number of Recommenders"]/df_merged["Number of Attendees"] *100,3)
df_merged["Percentage of Recommenders"]=df_merged["Percentage of Recommenders"].map('{:.2f}%'.format)
df_merged.head()

Unnamed: 0,BootcampName,Number of Attendees,Number of Recommenders,Percentage of Recommended,Percentage of Recommenders
0,General Assembly,90,70.0,77.778,77.78%
1,Flatiron School,54,50.0,92.593,92.59%
2,Dev Bootcamp,48,41.0,85.417,85.42%
3,The Iron Yard,40,31.0,77.5,77.50%
4,Prime Digital Academy,30,25.0,83.333,83.33%


In [12]:
# Export to excel and remove index
df_merged.to_csv("output/parttwo_output.csv", index=False, header=True)