In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
csvfile = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
df = pd.read_csv(csvfile, low_memory = False)

In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
df = df.iloc[:,[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]

In [5]:
# Change reading 0 and 1 to No and Yes, respectively
df = df.replace(0, "No")
df = df.replace(1, "Yes")

In [6]:
# Extract rows for only those who attended a bootcamp
attended_df = df.loc[df["AttendedBootcamp"] == "Yes", :]

In [7]:
# Create a DataFrame with two columns: One with the bootcamp name, and one with the number of respondents who went to each bootcamp.
bootcamp_name = pd.DataFrame(attended_df["BootcampName"].value_counts())
bootcamp_name.head()
bootcamp_name = bootcamp_name.reset_index()
bootcamp_name.columns = ['BootcampName', 'Number of Respondents']
bootcamp_name

Unnamed: 0,BootcampName,Number of Respondents
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30
5,Hack Reactor,29
6,Turing,27
7,App Academy,22
8,Hackbright Academy,22
9,Code Fellows,21


In [8]:
# Create DataFrame of the different boot camps that had a significant number of attendees
bootcamp_name = pd.DataFrame(attended_df["BootcampName"].value_counts())
bootcamp_name.head()
bootcamp_name = bootcamp_name.reset_index()
bootcamp_name.columns = ['BootcampName', 'Number of Respondents']
bootcamp_name

Unnamed: 0,BootcampName,Number of Respondents
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30
5,Hack Reactor,29
6,Turing,27
7,App Academy,22
8,Hackbright Academy,22
9,Code Fellows,21


In [9]:
# Count how many attendees of each bootcamp would recommend the bootcamp
recommended_df = df.loc[df["BootcampRecommend"] == "Yes", :]
recommended_df.head()

recommend_bootcamp = pd.DataFrame(recommended_df["BootcampName"].value_counts())
recommend_bootcamp.head()
recommend_bootcamp = recommend_bootcamp.reset_index()
recommend_bootcamp.columns = ['BootcampName', 'Number of Recommenders']
recommend_bootcamp

Unnamed: 0,BootcampName,Number of Recommenders
0,General Assembly,70
1,Flatiron School,50
2,Dev Bootcamp,41
3,The Iron Yard,31
4,Hack Reactor,27
5,Turing,26
6,Prime Digital Academy,25
7,App Academy,20
8,Hackbright Academy,19
9,MakerSquare,18


In [10]:
# Merge the two created data frames on the name of tbe bootcamp
merged_df = pd.merge(bootcamp_name, recommend_bootcamp, on="BootcampName")
merged_df.head()

Unnamed: 0,BootcampName,Number of Respondents,Number of Recommenders
0,General Assembly,90,70
1,Flatiron School,54,50
2,Dev Bootcamp,48,41
3,The Iron Yard,40,31
4,Prime Digital Academy,30,25


In [13]:
# Calculate percentage of each bootcamp's students who are recommenders
merged_df["% Recommended"] = merged_df["Number of Recommenders"]/ merged_df["Number of Respondents"] *100
merged_df = merged_df.sort_values(by="% Recommended", ascending=False).round(2)
merged_df["% Recommended"] = merged_df["% Recommended"].map("{:,.2f}%".format)
merged_df

Unnamed: 0,BootcampName,Number of Respondents,Number of Recommenders,% Recommended
58,Dev League,3,3,100.00%
49,devCodeCamp,4,4,100.00%
92,Founders & Coders,1,1,100.00%
91,Operation Spark,1,1,100.00%
90,Byte Academy,1,1,100.00%
89,Coder's Lab,1,1,100.00%
88,AcadGild,2,2,100.00%
87,EHD Academy,2,2,100.00%
86,Omaha Code School,2,2,100.00%
85,Eleven Fifty Academy,2,2,100.00%


In [12]:
# Export to excel and remove index
merged_df.to_excel("output/Bootcamppart2.xlsx", index=False)