In [1]:
# Dependencies
import pandas as pd, os, numpy as np

In [2]:
# load CSV
path = "../Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
survey_df = pd.read_csv(path, encoding = "utf-8", low_memory=False)
survey_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
coders_df = survey_df.iloc[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]].copy()
coders_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [5]:
#any exploration of above dataframe
print(coders_df.shape)

(15620, 21)


In [6]:
# Change reading 0 and 1 to No and Yes, respectively
coders_df.replace({0.0: "No", 1.0:"Yes" }, inplace = True)
coders_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [7]:
# Extract rows for only those who attended a bootcamp
bootcamp_df = coders_df.loc[coders_df["AttendedBootcamp"]== "Yes", :]
bootcamp_df.shape

(953, 21)

In [8]:
# Create DataFrame of the different boot camps that had a significant number of attendees
#* Create a DataFrame with two columns: One with the bootcamp name, and one with the number of respondents who went to each bootcamp.
bootcamp_attendees = pd.DataFrame(bootcamp_df["BootcampName"].value_counts())
bootcamp_attendees.reset_index(inplace = True)
bootcamp_attendees.columns = ["BootcampName", "Attendee Count"]
bootcamp_attendees.head()

Unnamed: 0,BootcampName,Attendee Count
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [9]:
# Count how many attendees of each bootcamp would recommend the bootcamp
#bootcamp_df["BootcampRecommend"].unique()
bootcamp_recommendees = bootcamp_df.loc[(bootcamp_df["AttendedBootcamp"] =="Yes") & (bootcamp_df["BootcampRecommend"] =="Yes"), :]
bootcamp_recommendees = pd.DataFrame(bootcamp_recommendees["BootcampName"].value_counts())
bootcamp_recommendees.reset_index(inplace=True)
bootcamp_recommendees.columns = ["BootcampName", "Recommendee Count"]
bootcamp_recommendees.head(10)

Unnamed: 0,BootcampName,Recommendee Count
0,General Assembly,70
1,Flatiron School,50
2,Dev Bootcamp,41
3,The Iron Yard,31
4,Hack Reactor,27
5,Turing,26
6,Prime Digital Academy,25
7,App Academy,20
8,Hackbright Academy,19
9,MakerSquare,18


In [10]:
# Merge the two created data frames on the name of tbe bootcamp
bootcamp_merged = pd.merge(bootcamp_attendees, bootcamp_recommendees, on = "BootcampName")
bootcamp_merged.head()

Unnamed: 0,BootcampName,Attendee Count,Recommendee Count
0,General Assembly,90,70
1,Flatiron School,54,50
2,Dev Bootcamp,48,41
3,The Iron Yard,40,31
4,Prime Digital Academy,30,25


In [11]:
# Calculate percentage of eac bootcamp's students who are recommenders
bootcamp_merged["percent_recommenders"] = 100*(bootcamp_merged["Recommendee Count"]/bootcamp_merged["Attendee Count"])
bootcamp_merged["percent_recommenders"] = bootcamp_merged["percent_recommenders"].map("{0:,.2f}%".format)
bootcamp_merged.head(2)
bootcamp_merged.shape

(116, 4)

In [12]:
# Export to excel and remove index
bootcamp_merged.to_excel("../Output/BootcampOutputPart2.xlsx", index = False)