### Import Modules

In [1]:
import scipy
import sklearn.metrics as metrics
import sklearn.preprocessing as pp
import sklearn.ensemble as ensemble
import sklearn.cluster as cluster
import sklearn.linear_model as linear_model

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

import sklearn.tree as tree

import pandas as pd
import numpy as np


In [2]:
pd.options.display.max_columns = 120

### Load the Data and view

In [3]:
schema = pd.read_csv("Data/Survey_results_schema.csv")

In [4]:
schema.set_index('Column', inplace=True)

In [5]:
schema.loc['Hobbyist'].QuestionText

'Do you code as a hobby?'

In [6]:
df = pd.read_csv("Data/Survey_results_public.csv")

In [7]:
df.set_index("Respondent", inplace=True)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64461 entries, 1 to 65112
Data columns (total 60 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   MainBranch                    64162 non-null  object 
 1   Hobbyist                      64416 non-null  object 
 2   Age                           45446 non-null  float64
 3   Age1stCode                    57900 non-null  object 
 4   CompFreq                      40069 non-null  object 
 5   CompTotal                     34826 non-null  float64
 6   ConvertedComp                 34756 non-null  float64
 7   Country                       64072 non-null  object 
 8   CurrencyDesc                  45472 non-null  object 
 9   CurrencySymbol                45472 non-null  object 
 10  DatabaseDesireNextYear        44070 non-null  object 
 11  DatabaseWorkedWith            49537 non-null  object 
 12  DevType                       49370 non-null  object 
 13  E

In [9]:
df["Hobbyist"].head()

Respondent
1    Yes
2     No
3    Yes
4    Yes
5    Yes
Name: Hobbyist, dtype: object

# Data Cleaning

In [10]:
# make hobbyist data frame; drop columns where hobbyists 
hobbyist_df = df.dropna(subset=["Hobbyist"]) 


In [11]:
#see numeric variables
hobbyist_df.describe()

Unnamed: 0,Age,CompTotal,ConvertedComp,WorkWeekHrs
count,45446.0,34826.0,34756.0,41151.0
mean,30.834111,3.190464e+242,103756.1,40.782174
std,9.585392,inf,226885.3,17.816383
min,1.0,0.0,0.0,1.0
25%,24.0,20000.0,24648.0,40.0
50%,29.0,63000.0,54049.0,40.0
75%,35.0,125000.0,95000.0,44.0
max,279.0,1.1111110000000001e+247,2000000.0,475.0


In [12]:
hobbyist_df = hobbyist_df.drop("MainBranch", axis=1)

In [13]:
#convert string types that should be numeric to numeric types
to_numeric = ['Age1stCode', 'YearsCode', 'YearsCodePro']

In [14]:
#Warning with replace on one of my columns

In [15]:
hobbyist_df.replace({'Younger than 5 years' : '5', 'Older than 85': '85'}, inplace=True)

In [16]:
hobbyist_df.replace({'Less than 1 year' : '0', 'More than 50 years': '50'}, inplace=True)

In [17]:
hobbyist_df.replace({'Less than 1 year' : '0', 'More than 50 years': '50'}, inplace=True)

## making Numeric columns

In [18]:
hobbyist_df.loc[:,"Age1stCode"] = pd.to_numeric(hobbyist_df.Age1stCode)

In [19]:
hobbyist_df.loc[:,"YearsCode"] = pd.to_numeric(hobbyist_df.YearsCode)
hobbyist_df.loc[:,"YearsCodePro"] = pd.to_numeric(hobbyist_df.YearsCodePro)

In [20]:

hobbyist_df.WorkWeekHrs.apply(lambda x: x if x <= 150 else np.nan)

Respondent
1        50.0
2         NaN
3         NaN
4        40.0
5         NaN
         ... 
64858     NaN
64867     NaN
64898     NaN
64925     NaN
65112     NaN
Name: WorkWeekHrs, Length: 64416, dtype: float64

In [21]:
#Create IsNa for all numeric columns

In [22]:
numeric_df = hobbyist_df.select_dtypes(include=["int", "float"])
numeric_columns = numeric_df.columns
numeric_columns

Index(['Age', 'Age1stCode', 'CompTotal', 'ConvertedComp', 'WorkWeekHrs',
       'YearsCode', 'YearsCodePro'],
      dtype='object')

In [23]:
hobbyist_df.WorkWeekHrs.isna()

Respondent
1        False
2         True
3         True
4        False
5         True
         ...  
64858     True
64867     True
64898     True
64925     True
65112     True
Name: WorkWeekHrs, Length: 64416, dtype: bool

In [24]:
def create_isNa(columns, dFrame):
    for i in columns:
        label = i + "_isNaN"
        series = dFrame[i].isna()
        dFrame.loc[:,label] = series.apply(lambda x: 1 if x else 0)
        

In [25]:
create_isNa(numeric_columns, hobbyist_df)

### Creating Dummy Variables out of Categorical Columns

In [26]:
cat_columns = hobbyist_df.select_dtypes(include=["object"]).columns
cat_columns

Index(['Hobbyist', 'CompFreq', 'Country', 'CurrencyDesc', 'CurrencySymbol',
       'DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType', 'EdLevel',
       'Employment', 'Ethnicity', 'Gender', 'JobFactors', 'JobSat', 'JobSeek',
       'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
       'WebframeWorkedWith', 'WelcomeChange'],
      dtype='object')

### Find categorical columns with more than X categories

In [27]:
def find_many_categories(columns, df, minimum, critical):
    
    #make dictionary with the usefull columns and the not usefull columns
    useful_columns = dict()
    not_useful_columns = dict()
    
    for i in columns:
        
        count = len(df[i].unique())
        if minimum:
            if count > critical:
                useful_columns[i] = count
            else:
                not_useful_columns[i] = count
                
                
        else:
            if count < critical:
                useful_columns[i] = count
            else:
                not_useful_columns[i] = count
    
    
    useful_df = df.loc[:,useful_columns.keys()]
    non_useful_df = df.loc[:,not_useful_columns.keys()]
        
    
    return (useful_df, non_useful_df)

In [28]:
hobbyist_ready_df, hobbyist_not_ready = find_many_categories(cat_columns, hobbyist_df, False, 8)
hobbyist_ready_df.fillna("isNaN", inplace=True)

In [29]:
hobbyist_df_ready = pd.concat([hobbyist_ready_df, hobbyist_df.select_dtypes(include=["int", "float"])], axis=1)

### Lets Get the variables in hobbyist_not_ready prepared for get_dummies method

In [30]:
#same information as country column
hobbyist_not_ready.drop(columns=["CurrencyDesc", "CurrencySymbol"], inplace=True)

In [31]:
hobbyist_not_ready.head(10)

Unnamed: 0_level_0,Country,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWJobHunt,NEWJobHuntResearch,NEWPurchaseResearch,NEWSOSites,NEWStuck,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,Sexuality,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
1,Germany,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,,,Start a free trial;Ask developers I know/work ...,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,Straight / Heterosexual,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core
2,United Kingdom,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Github;Slack,Confluence;Jira;Github;Gitlab;Slack,,,,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,"1,000 to 4,999 employees",iOS;Kubernetes;Linux;MacOS,iOS,,"Computer science, computer engineering, or sof...",,
3,Russian Federation,,,,,,,,,Objective-C;Python;Swift,Objective-C;Python;Swift,,,,,,,,Stack Overflow (public Q&A for anyone who codes),,,,,,,,
4,Albania,,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,White or of European descent,Man,Flex time or a flexible schedule;Office enviro...,,,,,,,Curious about other opportunities;Wanting to w...,,,Stack Overflow (public Q&A for anyone who code...,,20 to 99 employees,,,Straight / Heterosexual,"Computer science, computer engineering, or sof...",,
5,United States,MySQL;PostgreSQL,MySQL;PostgreSQL;Redis;SQLite,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,,Java;Ruby;Scala,HTML/CSS;Ruby;SQL,Ansible;Chef,Ansible,"Github;Google Suite (Docs, Meet, etc)",Confluence;Jira;Github;Slack;Google Suite (Doc...,,,Start a free trial;Ask developers I know/work ...,Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,,Docker;Google Cloud Platform;Heroku;Linux;Windows,AWS;Docker;Linux;MacOS;Windows,Straight / Heterosexual,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails
6,Germany,,,"Designer;Developer, front-end;Developer, mobile","Secondary school (e.g. American high school, G...",Employed full-time,White or of European descent,Man,Diversity of the company or organization;Langu...,HTML/CSS;Java;JavaScript,HTML/CSS;Java;JavaScript,,,Github;Slack,Confluence;Github;Slack;Trello,,,Ask developers I know/work with;Read ratings o...,Stack Overflow (public Q&A for anyone who code...,Play games;Visit Stack Overflow;Watch help / t...,,Android,Android;Docker;WordPress,Straight / Heterosexual,,React.js,
7,India,,,"Developer, back-end;Developer, front-end;Devel...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,C#;HTML/CSS;PHP,C#;HTML/CSS;PHP,,,,,Better compensation;Growth or leadership oppor...,"Read company media, such as employee blogs or ...",Start a free trial;Ask developers I know/work ...,Stack Overflow (public Q&A for anyone who codes),,20 to 99 employees,,,,"Computer science, computer engineering, or sof...",,
8,United States,MongoDB,MariaDB;MySQL;Redis,"Developer, back-end;Developer, desktop or ente...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,Remote work options;Opportunities for professi...,JavaScript,Python;SQL,Unity 3D,Ansible,,Confluence;Jira;Github;Slack;Microsoft Teams;G...,Curious about other opportunities;Better compe...,"Read company media, such as employee blogs or ...",Start a free trial;Ask developers I know/work ...,Stack Overflow (public Q&A for anyone who code...,Play games;Call a coworker or friend;Visit Sta...,20 to 99 employees,iOS;Slack Apps and Integrations,Docker,Straight / Heterosexual,"Computer science, computer engineering, or sof...",Django;React.js;Vue.js,Flask
9,Tunisia,,,"Developer, full-stack","Professional degree (JD, MD, etc.)","Independent contractor, freelancer, or self-em...",,Man,Diversity of the company or organization;Remot...,Python;Rust,HTML/CSS;JavaScript;PHP,,,,"Github;Slack;Trello;Google Suite (Docs, Meet, ...",Curious about other opportunities;Growth or le...,,Start a free trial,Stack Overflow (public Q&A for anyone who code...,Meditate,"Just me - I am a freelancer, sole proprietor, ...",,WordPress,Straight / Heterosexual,"Computer science, computer engineering, or sof...",Angular.js,jQuery
10,United Kingdom,Microsoft SQL Server,Microsoft SQL Server,"Database administrator;Developer, full-stack;D...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,White or of European descent,Man,Flex time or a flexible schedule;How widely us...,HTML/CSS;Java;JavaScript;Python;R;SQL,HTML/CSS;Java;JavaScript;Python;SQL,Pandas;TensorFlow,Pandas,Github;Microsoft Teams,Github;Microsoft Teams,Curious about other opportunities;Better compe...,Company reviews from third party sites (e.g. G...,Start a free trial;Ask developers I know/work ...,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow,2 to 9 employees,Android;Linux;Raspberry Pi;Windows,Android;Linux;Raspberry Pi;Windows,Straight / Heterosexual,Mathematics or statistics,Flask;jQuery,Flask;jQuery


In [32]:
#fill na categories with isNaN
hobbyist_not_ready.fillna("isNaN", inplace=True)

In [33]:
#separate between columns that need to be split into lists and columns that are already
#ready for get dummies to be applied
ready_list = ["EdLevel", "Employment", "OrgSize", "Sexuality", "UndergradMajor"]

delimeter_df = hobbyist_not_ready.drop(ready_list, axis=1)

ready_for_dummies = hobbyist_not_ready.loc[:, ready_list]

Country = delimeter_df.drop("Country", axis=1)
delimeter_df.drop("Country", axis=1, inplace=True)


In [34]:
#create df1 with get_dummies on the columns that have small number of categories
df1 = pd.get_dummies(ready_for_dummies, drop_first=True)
df1.head(3)

Unnamed: 0_level_0,"EdLevel_Bachelor’s degree (B.A., B.S., B.Eng., etc.)",EdLevel_I never completed any formal education,"EdLevel_Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","EdLevel_Other doctoral degree (Ph.D., Ed.D., etc.)",EdLevel_Primary/elementary school,"EdLevel_Professional degree (JD, MD, etc.)","EdLevel_Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",EdLevel_Some college/university study without earning a degree,EdLevel_isNaN,Employment_Employed part-time,"Employment_Independent contractor, freelancer, or self-employed","Employment_Not employed, and not looking for work","Employment_Not employed, but looking for work",Employment_Retired,Employment_Student,Employment_isNaN,OrgSize_10 to 19 employees,"OrgSize_10,000 or more employees",OrgSize_100 to 499 employees,OrgSize_2 to 9 employees,OrgSize_20 to 99 employees,"OrgSize_5,000 to 9,999 employees",OrgSize_500 to 999 employees,"OrgSize_Just me - I am a freelancer, sole proprietor, etc.",OrgSize_isNaN,Sexuality_Bisexual;Gay or Lesbian,Sexuality_Bisexual;Gay or Lesbian;Queer,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual;Queer,Sexuality_Bisexual;Queer,Sexuality_Bisexual;Straight / Heterosexual,Sexuality_Bisexual;Straight / Heterosexual;Queer,Sexuality_Gay or Lesbian,Sexuality_Gay or Lesbian;Queer,Sexuality_Gay or Lesbian;Straight / Heterosexual,Sexuality_Queer,Sexuality_Straight / Heterosexual,Sexuality_Straight / Heterosexual;Queer,Sexuality_isNaN,"UndergradMajor_A health science (such as nursing, pharmacy, radiology, etc.)","UndergradMajor_A humanities discipline (such as literature, history, philosophy, etc.)","UndergradMajor_A natural science (such as biology, chemistry, physics, etc.)","UndergradMajor_A social science (such as anthropology, psychology, political science, etc.)","UndergradMajor_Another engineering discipline (such as civil, electrical, mechanical, etc.)","UndergradMajor_Computer science, computer engineering, or software engineering","UndergradMajor_Fine arts or performing arts (such as graphic design, music, studio art, etc.)",UndergradMajor_I never declared a major,"UndergradMajor_Information systems, information technology, or system administration",UndergradMajor_Mathematics or statistics,UndergradMajor_Web development or web design,UndergradMajor_isNaN
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1


In [35]:
#flatten a series of lists into one series
def to_1D(series):
    return pd.Series([x for _list in series for x in _list])

In [36]:
#split by ; delimeter 
d_cols = delimeter_df.columns
for i in d_cols:
    delimeter_df.loc[:,i] = delimeter_df[i].str.split(";")


In [37]:
#get rid of special characters like ++
#for i in d_cols:
def replace_special(x):
    if "++" in x:
        x = x.replace("++", " Plus Plus")
        return x
    return x

In [38]:
#Expand all columns in delimeter_df and grab the value counts of each
dataFramesList = []

for i in d_cols:
    expanded = to_1D(delimeter_df[i])
    
    new_col_names = set(expanded)
    new_col_names = map(replace_special, new_col_names) 
    
    local_df = pd.DataFrame()
    
    for j in new_col_names:
        series_name = i + "_is_" + j
        series = hobbyist_not_ready[i].str.contains(j).rename(series_name)
        local_df = pd.concat([local_df,series], axis=1)
    dataFramesList.append(local_df)       

  return func(self, *args, **kwargs)


In [39]:
df2 = pd.concat(dataFramesList, axis=1)

In [40]:
df2 = df2.apply(lambda x: x*1)

In [41]:
df3 = pd.get_dummies(hobbyist_df_ready.drop('Hobbyist', axis=1), drop_first=True)

In [42]:
df2

Unnamed: 0,DatabaseDesireNextYear_is_MariaDB,DatabaseDesireNextYear_is_Cassandra,DatabaseDesireNextYear_is_Elasticsearch,DatabaseDesireNextYear_is_Microsoft SQL Server,DatabaseDesireNextYear_is_MySQL,DatabaseDesireNextYear_is_Couchbase,DatabaseDesireNextYear_is_Firebase,DatabaseDesireNextYear_is_isNaN,DatabaseDesireNextYear_is_IBM DB2,DatabaseDesireNextYear_is_PostgreSQL,DatabaseDesireNextYear_is_DynamoDB,DatabaseDesireNextYear_is_MongoDB,DatabaseDesireNextYear_is_Redis,DatabaseDesireNextYear_is_SQLite,DatabaseDesireNextYear_is_Oracle,DatabaseWorkedWith_is_MariaDB,DatabaseWorkedWith_is_Elasticsearch,DatabaseWorkedWith_is_Cassandra,DatabaseWorkedWith_is_Microsoft SQL Server,DatabaseWorkedWith_is_MySQL,DatabaseWorkedWith_is_Couchbase,DatabaseWorkedWith_is_Firebase,DatabaseWorkedWith_is_isNaN,DatabaseWorkedWith_is_IBM DB2,DatabaseWorkedWith_is_PostgreSQL,DatabaseWorkedWith_is_DynamoDB,DatabaseWorkedWith_is_MongoDB,DatabaseWorkedWith_is_Redis,DatabaseWorkedWith_is_SQLite,DatabaseWorkedWith_is_Oracle,DevType_is_Designer,DevType_is_Academic researcher,"DevType_is_Developer, game or graphics","DevType_is_Developer, embedded applications or devices",DevType_is_Data scientist or machine learning specialist,DevType_is_Database administrator,DevType_is_Data or business analyst,DevType_is_System administrator,DevType_is_Scientist,DevType_is_Senior executive/VP,DevType_is_Product manager,"DevType_is_Engineer, data",DevType_is_Educator,DevType_is_Marketing or sales professional,"DevType_is_Developer, front-end","DevType_is_Engineer, site reliability",DevType_is_Engineering manager,"DevType_is_Developer, desktop or enterprise applications",DevType_is_DevOps specialist,"DevType_is_Developer, mobile",DevType_is_isNaN,"DevType_is_Developer, back-end","DevType_is_Developer, full-stack","DevType_is_Developer, QA or test",Ethnicity_is_South Asian,Ethnicity_is_Multiracial,Ethnicity_is_Southeast Asian,Ethnicity_is_White or of European descent,Ethnicity_is_Biracial,Ethnicity_is_Black or of African descent,...,PlatformDesireNextYear_is_AWS,PlatformDesireNextYear_is_Android,PlatformDesireNextYear_is_isNaN,PlatformDesireNextYear_is_WordPress,PlatformDesireNextYear_is_Arduino,PlatformDesireNextYear_is_Windows,PlatformDesireNextYear_is_Slack Apps and Integrations,PlatformDesireNextYear_is_Kubernetes,PlatformDesireNextYear_is_IBM Cloud or Watson,PlatformWorkedWith_is_Docker,PlatformWorkedWith_is_iOS,PlatformWorkedWith_is_MacOS,PlatformWorkedWith_is_AWS,PlatformWorkedWith_is_Linux,PlatformWorkedWith_is_Raspberry Pi,PlatformWorkedWith_is_Heroku,PlatformWorkedWith_is_Google Cloud Platform,PlatformWorkedWith_is_Microsoft Azure,PlatformWorkedWith_is_Android,PlatformWorkedWith_is_isNaN,PlatformWorkedWith_is_WordPress,PlatformWorkedWith_is_Arduino,PlatformWorkedWith_is_Windows,PlatformWorkedWith_is_Slack Apps and Integrations,PlatformWorkedWith_is_Kubernetes,PlatformWorkedWith_is_IBM Cloud or Watson,WebframeDesireNextYear_is_Spring,WebframeDesireNextYear_is_Ruby on Rails,WebframeDesireNextYear_is_Django,WebframeDesireNextYear_is_Angular.js,WebframeDesireNextYear_is_Flask,WebframeDesireNextYear_is_Express,WebframeDesireNextYear_is_ASP.NET,WebframeDesireNextYear_is_jQuery,WebframeDesireNextYear_is_Vue.js,WebframeDesireNextYear_is_Gatsby,WebframeDesireNextYear_is_React.js,WebframeDesireNextYear_is_isNaN,WebframeDesireNextYear_is_Symfony,WebframeDesireNextYear_is_Angular,WebframeDesireNextYear_is_Laravel,WebframeDesireNextYear_is_Drupal,WebframeDesireNextYear_is_ASP.NET Core,WebframeWorkedWith_is_Spring,WebframeWorkedWith_is_Ruby on Rails,WebframeWorkedWith_is_Angular.js,WebframeWorkedWith_is_Django,WebframeWorkedWith_is_Flask,WebframeWorkedWith_is_Express,WebframeWorkedWith_is_ASP.NET,WebframeWorkedWith_is_jQuery,WebframeWorkedWith_is_Vue.js,WebframeWorkedWith_is_Gatsby,WebframeWorkedWith_is_isNaN,WebframeWorkedWith_is_React.js,WebframeWorkedWith_is_Symfony,WebframeWorkedWith_is_Angular,WebframeWorkedWith_is_Laravel,WebframeWorkedWith_is_Drupal,WebframeWorkedWith_is_ASP.NET Core
1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,...,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
5,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
64867,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
64898,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
64925,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [43]:
df1

Unnamed: 0_level_0,"EdLevel_Bachelor’s degree (B.A., B.S., B.Eng., etc.)",EdLevel_I never completed any formal education,"EdLevel_Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","EdLevel_Other doctoral degree (Ph.D., Ed.D., etc.)",EdLevel_Primary/elementary school,"EdLevel_Professional degree (JD, MD, etc.)","EdLevel_Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",EdLevel_Some college/university study without earning a degree,EdLevel_isNaN,Employment_Employed part-time,"Employment_Independent contractor, freelancer, or self-employed","Employment_Not employed, and not looking for work","Employment_Not employed, but looking for work",Employment_Retired,Employment_Student,Employment_isNaN,OrgSize_10 to 19 employees,"OrgSize_10,000 or more employees",OrgSize_100 to 499 employees,OrgSize_2 to 9 employees,OrgSize_20 to 99 employees,"OrgSize_5,000 to 9,999 employees",OrgSize_500 to 999 employees,"OrgSize_Just me - I am a freelancer, sole proprietor, etc.",OrgSize_isNaN,Sexuality_Bisexual;Gay or Lesbian,Sexuality_Bisexual;Gay or Lesbian;Queer,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual;Queer,Sexuality_Bisexual;Queer,Sexuality_Bisexual;Straight / Heterosexual,Sexuality_Bisexual;Straight / Heterosexual;Queer,Sexuality_Gay or Lesbian,Sexuality_Gay or Lesbian;Queer,Sexuality_Gay or Lesbian;Straight / Heterosexual,Sexuality_Queer,Sexuality_Straight / Heterosexual,Sexuality_Straight / Heterosexual;Queer,Sexuality_isNaN,"UndergradMajor_A health science (such as nursing, pharmacy, radiology, etc.)","UndergradMajor_A humanities discipline (such as literature, history, philosophy, etc.)","UndergradMajor_A natural science (such as biology, chemistry, physics, etc.)","UndergradMajor_A social science (such as anthropology, psychology, political science, etc.)","UndergradMajor_Another engineering discipline (such as civil, electrical, mechanical, etc.)","UndergradMajor_Computer science, computer engineering, or software engineering","UndergradMajor_Fine arts or performing arts (such as graphic design, music, studio art, etc.)",UndergradMajor_I never declared a major,"UndergradMajor_Information systems, information technology, or system administration",UndergradMajor_Mathematics or statistics,UndergradMajor_Web development or web design,UndergradMajor_isNaN
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
64867,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
64898,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
64925,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1


In [44]:
df3

Unnamed: 0_level_0,Age,Age1stCode,CompTotal,ConvertedComp,WorkWeekHrs,YearsCode,YearsCodePro,Age_isNaN,Age1stCode_isNaN,CompTotal_isNaN,ConvertedComp_isNaN,WorkWeekHrs_isNaN,YearsCode_isNaN,YearsCodePro_isNaN,CompFreq_Weekly,CompFreq_Yearly,CompFreq_isNaN,JobSat_Slightly dissatisfied,JobSat_Slightly satisfied,JobSat_Very dissatisfied,JobSat_Very satisfied,JobSat_isNaN,JobSeek_I am not interested in new job opportunities,"JobSeek_I’m not actively looking, but I am open to new opportunities",JobSeek_isNaN,NEWDevOps_Not sure,NEWDevOps_Yes,NEWDevOps_isNaN,NEWDevOpsImpt_Neutral,NEWDevOpsImpt_Not at all important,NEWDevOpsImpt_Not very important,NEWDevOpsImpt_Somewhat important,NEWDevOpsImpt_isNaN,NEWEdImpt_Fairly important,NEWEdImpt_Not at all important/not necessary,NEWEdImpt_Somewhat important,NEWEdImpt_Very important,NEWEdImpt_isNaN,NEWLearn_Once a decade,NEWLearn_Once a year,NEWLearn_Once every few years,NEWLearn_isNaN,NEWOffTopic_Not sure,NEWOffTopic_Yes,NEWOffTopic_isNaN,NEWOnboardGood_Onboarding? What onboarding?,NEWOnboardGood_Yes,NEWOnboardGood_isNaN,NEWOtherComms_Yes,NEWOtherComms_isNaN,NEWOvertime_Occasionally: 1-2 days per quarter but less than monthly,NEWOvertime_Often: 1-2 days per week or more,NEWOvertime_Rarely: 1-2 days per year or less,NEWOvertime_Sometimes: 1-2 days per month but less than weekly,NEWOvertime_isNaN,NEWPurpleLink_Annoyed,"NEWPurpleLink_Hello, old friend",NEWPurpleLink_Indifferent,NEWPurpleLink_isNaN,OpSys_Linux-based,OpSys_MacOS,OpSys_Windows,OpSys_isNaN,PurchaseWhat_I have little or no influence,PurchaseWhat_I have some influence,PurchaseWhat_isNaN,SOAccount_Not sure/can't remember,SOAccount_Yes,SOAccount_isNaN,"SOComm_No, not at all","SOComm_No, not really",SOComm_Not sure,"SOComm_Yes, definitely","SOComm_Yes, somewhat",SOComm_isNaN,SOPartFreq_A few times per week,SOPartFreq_Daily or almost daily,SOPartFreq_I have never participated in Q&A on Stack Overflow,SOPartFreq_Less than once per month or monthly,SOPartFreq_Multiple times per day,SOPartFreq_isNaN,SOVisitFreq_A few times per week,SOVisitFreq_Daily or almost daily,SOVisitFreq_I have never visited Stack Overflow (before today),SOVisitFreq_Less than once per month or monthly,SOVisitFreq_Multiple times per day,SOVisitFreq_isNaN,SurveyEase_Easy,SurveyEase_Neither easy nor difficult,SurveyEase_isNaN,SurveyLength_Too long,SurveyLength_Too short,SurveyLength_isNaN,Trans_Yes,Trans_isNaN,WelcomeChange_A lot more welcome now than last year,WelcomeChange_Just as welcome now as I felt last year,WelcomeChange_Not applicable - I did not use Stack Overflow last year,WelcomeChange_Somewhat less welcome now than last year,WelcomeChange_Somewhat more welcome now than last year,WelcomeChange_isNaN
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
1,,13.0,,,50.0,36.0,27.0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
2,,19.0,,,,7.0,4.0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0
3,,15.0,,,,4.0,,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0
4,25.0,18.0,,,40.0,7.0,4.0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0
5,31.0,16.0,,,,15.0,8.0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,,16.0,,,,10.0,0.0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1
64867,,,,,,,,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1
64898,,,,,,,,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1
64925,,,,,,,,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1


In [45]:
pd.concat([df3, hobbyist_df_ready["Hobbyist"]]).head(10)

Unnamed: 0_level_0,0,Age,Age1stCode,Age1stCode_isNaN,Age_isNaN,CompFreq_Weekly,CompFreq_Yearly,CompFreq_isNaN,CompTotal,CompTotal_isNaN,ConvertedComp,ConvertedComp_isNaN,JobSat_Slightly dissatisfied,JobSat_Slightly satisfied,JobSat_Very dissatisfied,JobSat_Very satisfied,JobSat_isNaN,JobSeek_I am not interested in new job opportunities,"JobSeek_I’m not actively looking, but I am open to new opportunities",JobSeek_isNaN,NEWDevOpsImpt_Neutral,NEWDevOpsImpt_Not at all important,NEWDevOpsImpt_Not very important,NEWDevOpsImpt_Somewhat important,NEWDevOpsImpt_isNaN,NEWDevOps_Not sure,NEWDevOps_Yes,NEWDevOps_isNaN,NEWEdImpt_Fairly important,NEWEdImpt_Not at all important/not necessary,NEWEdImpt_Somewhat important,NEWEdImpt_Very important,NEWEdImpt_isNaN,NEWLearn_Once a decade,NEWLearn_Once a year,NEWLearn_Once every few years,NEWLearn_isNaN,NEWOffTopic_Not sure,NEWOffTopic_Yes,NEWOffTopic_isNaN,NEWOnboardGood_Onboarding? What onboarding?,NEWOnboardGood_Yes,NEWOnboardGood_isNaN,NEWOtherComms_Yes,NEWOtherComms_isNaN,NEWOvertime_Occasionally: 1-2 days per quarter but less than monthly,NEWOvertime_Often: 1-2 days per week or more,NEWOvertime_Rarely: 1-2 days per year or less,NEWOvertime_Sometimes: 1-2 days per month but less than weekly,NEWOvertime_isNaN,NEWPurpleLink_Annoyed,"NEWPurpleLink_Hello, old friend",NEWPurpleLink_Indifferent,NEWPurpleLink_isNaN,OpSys_Linux-based,OpSys_MacOS,OpSys_Windows,OpSys_isNaN,PurchaseWhat_I have little or no influence,PurchaseWhat_I have some influence,PurchaseWhat_isNaN,SOAccount_Not sure/can't remember,SOAccount_Yes,SOAccount_isNaN,"SOComm_No, not at all","SOComm_No, not really",SOComm_Not sure,"SOComm_Yes, definitely","SOComm_Yes, somewhat",SOComm_isNaN,SOPartFreq_A few times per week,SOPartFreq_Daily or almost daily,SOPartFreq_I have never participated in Q&A on Stack Overflow,SOPartFreq_Less than once per month or monthly,SOPartFreq_Multiple times per day,SOPartFreq_isNaN,SOVisitFreq_A few times per week,SOVisitFreq_Daily or almost daily,SOVisitFreq_I have never visited Stack Overflow (before today),SOVisitFreq_Less than once per month or monthly,SOVisitFreq_Multiple times per day,SOVisitFreq_isNaN,SurveyEase_Easy,SurveyEase_Neither easy nor difficult,SurveyEase_isNaN,SurveyLength_Too long,SurveyLength_Too short,SurveyLength_isNaN,Trans_Yes,Trans_isNaN,WelcomeChange_A lot more welcome now than last year,WelcomeChange_Just as welcome now as I felt last year,WelcomeChange_Not applicable - I did not use Stack Overflow last year,WelcomeChange_Somewhat less welcome now than last year,WelcomeChange_Somewhat more welcome now than last year,WelcomeChange_isNaN,WorkWeekHrs,WorkWeekHrs_isNaN,YearsCode,YearsCodePro,YearsCodePro_isNaN,YearsCode_isNaN
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1
1,,,13.0,0.0,1.0,0.0,0.0,0.0,,1.0,,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,50.0,0.0,36.0,27.0,0.0,0.0
2,,,19.0,0.0,1.0,0.0,0.0,1.0,,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,,1.0,7.0,4.0,0.0,0.0
3,,,15.0,0.0,1.0,0.0,0.0,1.0,,1.0,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,,1.0,4.0,,1.0,0.0
4,,25.0,18.0,0.0,0.0,0.0,0.0,1.0,,1.0,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,40.0,0.0,7.0,4.0,0.0,0.0
5,,31.0,16.0,0.0,0.0,0.0,0.0,1.0,,1.0,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,1.0,15.0,8.0,0.0,0.0
6,,,14.0,0.0,1.0,0.0,0.0,1.0,,1.0,,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,,1.0,6.0,4.0,0.0,0.0
7,,,18.0,0.0,1.0,0.0,0.0,0.0,,1.0,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,,1.0,6.0,4.0,0.0,0.0
8,,36.0,12.0,0.0,0.0,0.0,1.0,0.0,116000.0,0.0,116000.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,39.0,0.0,17.0,13.0,0.0,0.0
9,,30.0,20.0,0.0,0.0,0.0,0.0,1.0,,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,50.0,0.0,6.0,4.0,0.0,0.0
10,,22.0,14.0,0.0,0.0,0.0,1.0,0.0,25000.0,0.0,32315.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,36.0,0.0,8.0,4.0,0.0,0.0


In [46]:
final_df = pd.concat([df1, df2, df3], axis=1)

In [47]:
final_df["Hobbyist"] = hobbyist_df["Hobbyist"]

In [48]:
final_df

Unnamed: 0,"EdLevel_Bachelor’s degree (B.A., B.S., B.Eng., etc.)",EdLevel_I never completed any formal education,"EdLevel_Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","EdLevel_Other doctoral degree (Ph.D., Ed.D., etc.)",EdLevel_Primary/elementary school,"EdLevel_Professional degree (JD, MD, etc.)","EdLevel_Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",EdLevel_Some college/university study without earning a degree,EdLevel_isNaN,Employment_Employed part-time,"Employment_Independent contractor, freelancer, or self-employed","Employment_Not employed, and not looking for work","Employment_Not employed, but looking for work",Employment_Retired,Employment_Student,Employment_isNaN,OrgSize_10 to 19 employees,"OrgSize_10,000 or more employees",OrgSize_100 to 499 employees,OrgSize_2 to 9 employees,OrgSize_20 to 99 employees,"OrgSize_5,000 to 9,999 employees",OrgSize_500 to 999 employees,"OrgSize_Just me - I am a freelancer, sole proprietor, etc.",OrgSize_isNaN,Sexuality_Bisexual;Gay or Lesbian,Sexuality_Bisexual;Gay or Lesbian;Queer,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual,Sexuality_Bisexual;Gay or Lesbian;Straight / Heterosexual;Queer,Sexuality_Bisexual;Queer,Sexuality_Bisexual;Straight / Heterosexual,Sexuality_Bisexual;Straight / Heterosexual;Queer,Sexuality_Gay or Lesbian,Sexuality_Gay or Lesbian;Queer,Sexuality_Gay or Lesbian;Straight / Heterosexual,Sexuality_Queer,Sexuality_Straight / Heterosexual,Sexuality_Straight / Heterosexual;Queer,Sexuality_isNaN,"UndergradMajor_A health science (such as nursing, pharmacy, radiology, etc.)","UndergradMajor_A humanities discipline (such as literature, history, philosophy, etc.)","UndergradMajor_A natural science (such as biology, chemistry, physics, etc.)","UndergradMajor_A social science (such as anthropology, psychology, political science, etc.)","UndergradMajor_Another engineering discipline (such as civil, electrical, mechanical, etc.)","UndergradMajor_Computer science, computer engineering, or software engineering","UndergradMajor_Fine arts or performing arts (such as graphic design, music, studio art, etc.)",UndergradMajor_I never declared a major,"UndergradMajor_Information systems, information technology, or system administration",UndergradMajor_Mathematics or statistics,UndergradMajor_Web development or web design,UndergradMajor_isNaN,DatabaseDesireNextYear_is_MariaDB,DatabaseDesireNextYear_is_Cassandra,DatabaseDesireNextYear_is_Elasticsearch,DatabaseDesireNextYear_is_Microsoft SQL Server,DatabaseDesireNextYear_is_MySQL,DatabaseDesireNextYear_is_Couchbase,DatabaseDesireNextYear_is_Firebase,DatabaseDesireNextYear_is_isNaN,DatabaseDesireNextYear_is_IBM DB2,...,NEWOffTopic_Not sure,NEWOffTopic_Yes,NEWOffTopic_isNaN,NEWOnboardGood_Onboarding? What onboarding?,NEWOnboardGood_Yes,NEWOnboardGood_isNaN,NEWOtherComms_Yes,NEWOtherComms_isNaN,NEWOvertime_Occasionally: 1-2 days per quarter but less than monthly,NEWOvertime_Often: 1-2 days per week or more,NEWOvertime_Rarely: 1-2 days per year or less,NEWOvertime_Sometimes: 1-2 days per month but less than weekly,NEWOvertime_isNaN,NEWPurpleLink_Annoyed,"NEWPurpleLink_Hello, old friend",NEWPurpleLink_Indifferent,NEWPurpleLink_isNaN,OpSys_Linux-based,OpSys_MacOS,OpSys_Windows,OpSys_isNaN,PurchaseWhat_I have little or no influence,PurchaseWhat_I have some influence,PurchaseWhat_isNaN,SOAccount_Not sure/can't remember,SOAccount_Yes,SOAccount_isNaN,"SOComm_No, not at all","SOComm_No, not really",SOComm_Not sure,"SOComm_Yes, definitely","SOComm_Yes, somewhat",SOComm_isNaN,SOPartFreq_A few times per week,SOPartFreq_Daily or almost daily,SOPartFreq_I have never participated in Q&A on Stack Overflow,SOPartFreq_Less than once per month or monthly,SOPartFreq_Multiple times per day,SOPartFreq_isNaN,SOVisitFreq_A few times per week,SOVisitFreq_Daily or almost daily,SOVisitFreq_I have never visited Stack Overflow (before today),SOVisitFreq_Less than once per month or monthly,SOVisitFreq_Multiple times per day,SOVisitFreq_isNaN,SurveyEase_Easy,SurveyEase_Neither easy nor difficult,SurveyEase_isNaN,SurveyLength_Too long,SurveyLength_Too short,SurveyLength_isNaN,Trans_Yes,Trans_isNaN,WelcomeChange_A lot more welcome now than last year,WelcomeChange_Just as welcome now as I felt last year,WelcomeChange_Not applicable - I did not use Stack Overflow last year,WelcomeChange_Somewhat less welcome now than last year,WelcomeChange_Somewhat more welcome now than last year,WelcomeChange_isNaN,Hobbyist
1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yes
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,No
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,...,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,Yes
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,Yes
5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,Yes
64867,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,...,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,Yes
64898,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,...,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,Yes
64925,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,...,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,Yes


In [49]:
final_df.to_csv("data/cleaned.csv")

In [50]:
final_df["JobSat_isNaN"]

1        0
2        0
3        1
4        0
5        1
        ..
64858    1
64867    1
64898    1
64925    1
65112    1
Name: JobSat_isNaN, Length: 64416, dtype: uint8