In [1]:
# Dependencies
import os
import pandas as pd
import csv

In [2]:
# load CSV
survey_data = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
csv_df = pd.read_csv(survey_data, encoding = "iso-8859-1", low_memory = False)

In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
reduced_df = csv_df.iloc[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]
reduced_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [5]:
# Change reading 0 and 1 to No and Yes, respectively
reducedyn_df = reduced_df.replace({0.0:"No", 1.0:"Yes"})
reducedyn_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [6]:
# Extract rows for only those who attended a bootcamp
attended_camp_df = reducedyn_df.loc[reducedyn_df["AttendedBootcamp"] == "Yes", :]
attended_camp_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,Codify Academy,,No,,"between 100,000 and 1 million",...,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,DaVinci Coders,45000.0,No,,more than 1 million,...,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,Coder Foundry,75000.0,Yes,3.0,"less than 100,000",...,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,General Assembly,,No,,"between 100,000 and 1 million",...,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,,No,,"between 100,000 and 1 million",...,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [7]:
# Create DataFrame of the different boot camps ##########################################################################that had a significant number of attendees
bootcamp_names_df = pd.DataFrame(reduced_df["BootcampName"].value_counts())
bootcamp_names_df.reset_index(inplace=True)
bootcamp_names_df.columns = ["BootcampName","# of Attendees"]
bootcamp_names_df.head()

Unnamed: 0,BootcampName,# of Attendees
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [9]:
#count how many attendees of each bootcamp would recommend the bootcamp 
attended_camp2_df = attended_camp_df.replace({"Yes":1,"No":0})
recommend_bootcamp = pd.DataFrame(attended_camp2_df.groupby("BootcampName")["BootcampRecommend"].sum())
recommend_bootcamp.head()

Unnamed: 0_level_0,BootcampRecommend
BootcampName,Unnamed: 1_level_1
10x.org.il,3.0
4Geeks Academy,4.0
AcadGild,2.0
Academia de CÃ³digo,4.0
Academic Work Academy,0.0


In [10]:
# Merge the two created data frames on the name of tbe bootcamps
merged_df = pd.merge(bootcamp_names_df, recommend_bootcamp, on = ["BootcampName"])
merged_df.head()

Unnamed: 0,BootcampName,# of Attendees,BootcampRecommend
0,General Assembly,90,70.0
1,Flatiron School,54,50.0
2,Dev Bootcamp,48,41.0
3,The Iron Yard,40,31.0
4,Prime Digital Academy,30,25.0


In [11]:
# Calculate percentage of each bootcamp's students who are recommenders
merged_df["% Recommend"] = merged_df["BootcampRecommend"] / merged_df["# of Attendees"] * 100
mergesort_df = merged_df.sort_values(by="% Recommend", ascending=False)
mergesort_df.head()

Unnamed: 0,BootcampName,# of Attendees,BootcampRecommend,% Recommend
127,Ladies Learning Code,1,1.0,100.0
106,Coder's Lab,1,1.0,100.0
35,Fullstack Academy,6,6.0,100.0
104,Ruby On The Beach,1,1.0,100.0
103,Code Union,1,1.0,100.0


In [13]:
# Export to excel and remove index
mergesort_df.to_excel("Bootcamp_Summary.xlsx", index=False)