In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV

coders_csv= ("Resources/2016-FCC-New-Coders-Survey-Data.csv")

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types 

coders_df = pd.read_csv(coders_csv, encoding='iso-8859-1', low_memory=False)

In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111

coders_data= coders_df.iloc[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]

coders_data.head()


Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [5]:
# Change reading 0 and 1 to No and Yes, respectively 

coders_data = coders_data.replace({0.0:"No", 1.0:"Yes"})
coders_data.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [6]:
# Extract rows for only those who attended a bootcamp 

final_coder_data = coders_data.loc[coders_data["AttendedBootcamp"] == "Yes", :]
final_coder_data.count()

Age                     818
AttendedBootcamp        953
BootcampFinish          933
BootcampFullJobAfter    635
BootcampLoanYesNo       934
BootcampName            895
BootcampPostSalary      330
BootcampRecommend       937
ChildrenNumber          148
CityPopulation          823
CodeEventBootcamp        27
CountryLive             825
EmploymentField         476
EmploymentStatus        792
Gender                  833
HasChildren             211
HoursLearning           925
Income                  430
JobRoleInterest         402
SchoolDegree            832
SchoolMajor             622
dtype: int64

In [7]:
 # Create DataFrame of the different boot camps that had a significant number of attendees

bootcamp_name = pd.DataFrame(final_coder_data["BootcampName"].value_counts())

bootcamp_name.reset_index(inplace=True)
bootcamp_name.columns = ["BootcampName", "Count"]

bootcamp_name.head()

Unnamed: 0,BootcampName,Count
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [8]:
# Count how many attendees of each bootcamp would recommend the bootcamp 

bootcamp_recommend = final_coder_data.replace({"Yes": 1, "No": 0})
bootcamp_recommend = pd.DataFrame(bootcamp_recommend.groupby("BootcampName")["BootcampRecommend"].sum())

bootcamp_recommend.reset_index(inplace=True)
bootcamp_recommend.columns = ["BootcampName", "Recommend"]

bootcamp_recommend.head ()

Unnamed: 0,BootcampName,Recommend
0,10x.org.il,3.0
1,4Geeks Academy,4.0
2,AcadGild,2.0
3,Academia de CÃ³digo,4.0
4,Academic Work Academy,0.0


In [9]:
# Merge the two created data frames on the name of the bootcamp 
# Should I use inner or outer merge? Does it matter since its only three columns here and they should all contain all values

bootcamp_merge = pd.merge(bootcamp_name, bootcamp_recommend, on="BootcampName")
bootcamp_merge.head()

Unnamed: 0,BootcampName,Count,Recommend
0,General Assembly,90,70.0
1,Flatiron School,54,50.0
2,Dev Bootcamp,48,41.0
3,The Iron Yard,40,31.0
4,Prime Digital Academy,30,25.0


In [12]:
 # Calculate percentage of each bootcamp's students who are recommenders

Recommend_Percent = (bootcamp_merge["Recommend"] / bootcamp_merge["Count"]) * 100
Recommend_Percent 
bootcamp_merge = pd.Recommend_Percent.sort_values('self'                                          ascending=False,
                                          inplace=False)

                                          

#bootcamp_merge["Recommend_Percent"] = bootcamp_breakdown["Recommend_Percent"].map("{0:,.2f}%".format

SyntaxError: positional argument follows keyword argument (<ipython-input-12-d59faf553eb5>, line 7)

In [None]:
# Export to excel and remove index

bootcamp_merge.to_excel("Output/BootCampPart2.xlsx", index=False)