In [15]:
import pandas as pd
import numpy as np
import os
from io import StringIO

import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import graphviz

from sklearn import preprocessing
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

import pydotplus

%matplotlib inline
plt.rcParams['figure.figsize'] = [20, 10]

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 400)

os.getcwd()



'/Users/lubagloukhov/Documents/Consulting/PiqueSolutions/JavaSE_202007/scripts'

In [2]:
raw_df = pd.read_csv('../data/input/SurveyReport-rawdata_20200702_deduped.csv')  

# 1. Create: <br> &emsp; - short_name for columns

In [3]:
[x for x in raw_df.columns]

['Response ID',
 'IP Address',
 'Timestamp (mm/dd/yyyy)',
 'Duplicate',
 'Time Taken to Complete (Seconds)',
 'Seq. Number',
 'External Reference',
 'Custom Variable 1',
 'Custom Variable 2',
 'Custom Variable 3',
 'Custom Variable 4',
 'Custom Variable 5',
 'Respondent Email',
 'Email List',
 'Country Code',
 'Region',
 'Pique Solutions, an independent analyst firm, is conducting research sponsored by Oracle among selected IT professionals at leading organizations. The objective of this survey is to gather your feedback related to your Oracle Java SE Subscription experience in order to improve the service based on your feedback.Your input will be shared with Oracle in an aggregate format, and neither your name nor your company’s name will be associated with your comments without your explicit consent. We would like to talk with you about the benefits and shortcomings of your Oracle Java SE Subscription, and the considerations behind your selection of Oracle Java SE.First, please respo

In [25]:
short_names = ['Response_ID',
 'IP_Address',
 'Timestamp',
 'Duplicate',
 'Time_Complete_s',
 'Seq._Number',
 'External_Reference',
 'Custom_Variable_1',
 'Custom_Variable_2',
 'Custom_Variable_3',
 'Custom_Variable_4',
 'Custom_Variable_5',
 'Respondent_Email',
 'Email_List',
 'Country_Code',
 'Region',
 'Survey_Pre_Qual',
 'Job_Title',
 'Job_Title_Other',
 'Company_Industry',
 'Company_Industry_Other]',
 'Company_Revenue',
 'OracleJavaSESubscription_Org_Duration',
 'Java_App_Org_Deploy_2020',
 'OracleJavaSESubscription_Ind_Role',
 'OracleJavaSESubscription_Ind_Role_Other',
 'Survey_Consent',
 'Government_Org_Ind_Employee',
 'Gift_Card_Confirm',              
 'Company_Name',
 'Thank_You',
 'Title',
 'Country',
 'State',
 'App_Dev_Outsource',
 'Java_Dev_Org',
 "Java_Dev_Dist_Org",
 "Java_Dev_Dist_JVM",
 "Java_Dev_Dist_Python",
 "Java_Dev_Dist_JavaScript",
 "Platform_Dev_Org_Primary",
 "Platform_Dev_Org_Comment",
 "Platform_Dev_Org_LRHEL_P",
 "Platform_Dev_Org_LOracle_P",
 "Platform_Dev_Org_LUbuntu_P",
 "Platform_Dev_Org_LSUSE_P",
 "Platform_Dev_Org_LinuxOther_P",
 "Platform_Dev_Org_MacOS_P",
 "Platform_Dev_Org_Solaris_P",
 "Platform_Dev_Org_Windows_P",
 "Platform_Dev_Org_LinuxOther_Comment",
 'Use_Org_EclipseIDE',
 'Sat_Org_EclipseIDE',
 'Use_Org_IntelliJIDE',
 'Sat_Org_IntelliJIDE',
 'Use_Org_JDeveloper',
 'Sat_Org_JDeveloper',
 'Use_Org_NetBeansIDE',
 'Sat_Org_NetBeansIDE',
 'Use_Org_RationalAppDev',
 'Sat_Org_RationalAppDev',
 'Use_Org_VisualStudio',
 'Sat_Org_VisualStudio',
 'Use_Org_Other',
 'Sat_Org_Other',
 'Use_Sat_Org_Other_Comment',
 'Use_Org_JavaSEAMC',
 'Sat_Org_JavaSEAMC',
 'Use_Org_JavaSEAMC_UsageLog',
 'Sat_Org_JavaSEAMC_UsageLog',
 'Use_Org_JavaSEDepRuleSet',
 'Sat_Org_JavaSEDepRuleSet',
 'Use_Org_JavaSEMSICustTools',
 'Sat_Org_JavaSEMSICustTools',
 'Use_Org_JavaSEFlightRec',
 'Sat_Org_JavaSEFlightRec',
 'JavaSESub_PrePurchasePeriod',             
 'Information_Channel_Imp_OWebsites',             
 'Information_Channel_Imp_OSalesPros',             
 'Information_Channel_Imp_OMarketing',             
 'Information_Channel_Imp_Blogs',             
 'Information_Channel_Imp_ExtConsultants',             
 'Information_Channel_Imp_SocialMedia',
 'Information_Channel_Imp_UserComm',             
 'Information_Channel_Imp_IndustryPubs',             
 'Information_Channel_Imp_PeerRec',             
 'Information_Channel_Imp_Other',             
 'Information_Channel_Imp_Other_Comment',
 'OJavaSESubscription_Elem_Imp_Org_ContinuedInvest',
 'OJavaSESubscription_Elem_Sat_Org_ContinuedInvest',
 'OJavaSESubscription_Elem_Imp_Org_PatchesOlderV',
 'OJavaSESubscription_Elem_Sat_Org_PatchesOlderV',
 'OJavaSESubscription_Elem_Imp_Org_JavaWebStartUpdates',
 'OJavaSESubscription_Elem_Sat_Org_JavaWebStartUpdates',
 'OJavaSESubscription_Elem_Imp_Org_DesktopManageUpdates',
 'OJavaSESubscription_Elem_Sat_Org_DesktopManageUpdates',
 'OJavaSESubscription_Elem_Imp_Org_AutoUpdateTools',
 'OJavaSESubscription_Elem_Sat_Org_AutoUpdateTools',
 'OJavaSESubscription_Elem_Imp_Org_Monitoring',
 'OJavaSESubscription_Elem_Sat_Org_Monitoring',
 'OJavaSESubscription_Elem_Imp_Org_MyOSupport',
 'OJavaSESubscription_Elem_Sat_Org_MyOSupport',
 'OJavaSESubscription_Elem_Imp_Org_AccessCloud',
 'OJavaSESubscription_Elem_Sat_Org_AccessCloud',
 'OJavaSESubscription_Elem_Imp_Org_FlexLicensing',
 'OJavaSESubscription_Elem_Sat_Org_FlexLicensing',
 'OJavaSESubscription_Elem_Imp_Org_LegacyJava',
 'OJavaSESubscription_Elem_Sat_Org_LegacyJava',
 'OJavaSESubscription_Elem_Imp_Org_Other',
 'OJavaSESubscription_Elem_Sat_Org_Other',
 'OJavaSESubscription_Elem_Imp_Sat_Org_Other_Comment',
               
 'OJavaSESubscription_Attr_Imp_CommMethods',
 'OJavaSESubscription_Attr_Sat_CommMethods',
 'OJavaSESubscription_Attr_Imp_PhoneSupport',
 'OJavaSESubscription_Attr_Sat_PhoneSupport',
 'OJavaSESubscription_Attr_Imp_SSKnowledge',
 'OJavaSESubscription_Attr_Sat_SSKnowledge',
 'OJavaSESubscription_Attr_Imp_CoNeedsUnd',
 'OJavaSESubscription_Attr_Sat_CoNeedsUnd',
 'OJavaSESubscription_Attr_Imp_TrainingQual',
 'OJavaSESubscription_Attr_Sat_TrainingQual',
 'OJavaSESubscription_Attr_Imp_RepsonseTime',
 'OJavaSESubscription_Attr_Sat_RepsonseTime',
 
 'OJavaSESubscription_OrgPurch_Imp_StaffExp',
 'OJavaSESubscription_OrgPurch_Imp_PrPerRatio',
 'OJavaSESubscription_OrgPurch_Imp_TotalCost',
 'OJavaSESubscription_OrgPurch_Imp_LargeEcosystem',
 'OJavaSESubscription_OrgPurch_Imp_VendorSupport',
 'OJavaSESubscription_OrgPurch_Imp_InnovationPotential',
 'OJavaSESubscription_OrgPurch_Imp_LowerRisk',
 'OJavaSESubscription_OrgPurch_Imp_RegulatoryCompliance',
 'OJavaSESubscription_OrgPurch_Imp_MarketTrends',
 'OJavaSESubscription_OrgPurch_Imp_Competition',
 'OJavaSESubscription_OrgPurch_Imp_HigherAgility',
 'OJavaSESubscription_OrgPurch_Imp_StrategicImperative',
 'OJavaSESubscription_OrgPurch_Imp_ThoughtLeadership',
 'OJavaSESubscription_OrgPurch_Imp_CompanyPolitics',
 'OJavaSESubscription_OrgPurch_Imp_InnovationContribution',
 'OJavaSESubscription_OrgPurch_Imp_Other',
 'OJavaSESubscription_OrgPurch_Imp_Other_comment',
 'OJavaSESubscription_Alts',
 'OJavaSESubscription_Alts_Top3_MaintenanceTools',
 'OJavaSESubscription_Alts_Top3_UpdatesTools',
 'OJavaSESubscription_Alts_Top3_CostManagement ',
 'OJavaSESubscription_Alts_Top3_Flexibility',
 'OJavaSESubscription_Alts_Top3_Familiarity',
 'OJavaSESubscription_Alts_Top3_EasierUse]',
 'OJavaSESubscription_Alts_Top3_OpenSourceTools',
 'OJavaSESubscription_Alts_Top3_Support',
 'OJavaSESubscription_Alts_Top3_AppSecurity',
 'OJavaSESubscription_Alts_Top3_DevResources',
 'OJavaSESubscription_Alts_Top3_RiskReg',
 'OJavaSESubscription_Alts_Top3_RiskAppDowntime',
 'OJavaSESubscription_Alts_Top3_Other',
 'ThanksPhone1',
 'ThanksPhone2',
 'First Name',
 'Last Name',
 'Phone',
 'Email',
 'GiftCard_Select',
 'GiftCard_Email']

In [26]:
len(short_names)

162

In [12]:
short_names[21]

'Company_Revenue'

In [9]:
len(raw_df.columns)

162

In [22]:
pd.DataFrame({'new':short_names,
             'org':raw_df.columns})

Unnamed: 0,new,org
0,Response_ID,Response ID
1,IP_Address,IP Address
2,Timestamp,Timestamp (mm/dd/yyyy)
3,Duplicate,Duplicate
4,Time_Complete_s,Time Taken to Complete (Seconds)
5,Seq._Number,Seq. Number
6,External_Reference,External Reference
7,Custom_Variable_1,Custom Variable 1
8,Custom_Variable_2,Custom Variable 2
9,Custom_Variable_3,Custom Variable 3


# 2. Convert: <br> &emsp; - continuous variables to int/float, <br> &emsp; - categorical to dummy

In [92]:
col = ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better tools for maintenance and patching]'

In [93]:
print(raw_df[col].isna().sum())
raw_df[col].head()

186


0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
Name:  From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better tools for maintenance and patching], dtype: float64

In [94]:
raw_df[col].value_counts()

1.0    42
Name:  From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better tools for maintenance and patching], dtype: int64

In [98]:
cont_list = ['Approximately how many Java developers are in your organization?\xa0',
             "What is the approximate distribution of developers primarily developing on the following development platforms (in percentage)? Note: total %'s can be >100 since many orgs use multiple platforms % developing on Java",
             "What is the approximate distribution of developers primarily developing on the following development platforms (in percentage)? Note: total %'s can be >100 since many orgs use multiple platforms % developing on JVM languages besides Java",
 "What is the approximate distribution of developers primarily developing on the following development platforms (in percentage)? Note: total %'s can be >100 since many orgs use multiple platforms       % developing on Python",
 "What is the approximate distribution of developers primarily developing on the following development platforms (in percentage)? Note: total %'s can be >100 since many orgs use multiple platforms   % developing on JavaScript",
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Linux (RHEL)',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Linux (Oracle)',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Linux Ubuntu',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Linux (SUSE)',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Linux (Other) *',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  MacOS',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Solaris',
 'Please provide approximate percentage(s) for each of the development platforms listed below.  Windows',
   'Eclipse IDE Level of use (0-10)\xa0[ , ]',
    'Eclipse IDE Level of satisfaction (0-10)[ , ]',
 'IntelliJ IDE Level of use (0-10)\xa0[ , ]',
 'IntelliJ IDE Level of satisfaction (0-10)[ , ]',
 'JDeveloper Level of use (0-10)\xa0[ , ]',
 'JDeveloper Level of satisfaction (0-10)[ , ]',
 'NetBeans IDE Level of use (0-10)\xa0[ , ]',
 'NetBeans IDE Level of satisfaction (0-10)[ , ]',
 'Rational Application Developer Level of use (0-10)\xa0[ , ]',
 'Rational Application Developer Level of satisfaction (0-10)[ , ]',
 'Visual Studio Level of use (0-10)\xa0[ , ]',
 'Visual Studio Level of satisfaction (0-10)[ , ]',
 'Other * Level of use (0-10)\xa0[ , ]',
 'Other * Level of satisfaction (0-10)[ , ]',  
 'Oracle Java SE Advanced Management Console (AMC) Usage (0-10)[\xa0,\xa0]',
 'Oracle Java SE Advanced Management Console (AMC) Satisfaction (0-10)[\xa0,\xa0]',
 'Java SE Usage Logging in AMC Usage (0-10)[\xa0,\xa0]',
 'Java SE Usage Logging in AMC Satisfaction (0-10)[\xa0,\xa0]',
 'The Java SE Deployment Rule Set Usage (0-10)[\xa0,\xa0]',
 'The Java SE Deployment Rule Set Satisfaction (0-10)[\xa0,\xa0]',
 'Java SE MSI Customizations Tools  Usage (0-10)[\xa0,\xa0]',
 'Java SE MSI Customizations Tools  Satisfaction (0-10)[\xa0,\xa0]',
 'Java SE Flight Recorder Usage (0-10)[\xa0,\xa0]',
 'Java SE Flight Recorder Satisfaction (0-10)[\xa0,\xa0]',
  'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Oracle websites',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Oracle Sales professionals',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Oracle marketing materials',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Blogs',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  External consultants',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Social Media outlets (e.g. Twitter, LinkedIn, Facebook, etc.)',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  User communities',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Industry publications',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Peers’ recommendations',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Other *',
 'On a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”, please rate the importance of the information channels that increased your awareness of Oracle Java SE Subscription.  Other * [Can you please specify?]',
  
 'Oracle’s continued investment in the Java Ecosystem and Technology Importance (0-10)[ , ]',
 'Oracle’s continued investment in the Java Ecosystem and Technology Satisfaction (0-10)[ , ]',
 'Patches and updates for older versions of Java Importance (0-10)[ , ]',
 'Patches and updates for older versions of Java Satisfaction (0-10)[ , ]',
 'Tools and updates for desktop management specifically using Java Web Start Importance (0-10)[ , ]',
 'Tools and updates for desktop management specifically using Java Web Start Satisfaction (0-10)[ , ]',
 'Tools and updates for desktop management independently of Java Web Start / Browsers Importance (0-10)[ , ]',
 'Tools and updates for desktop management independently of Java Web Start / Browsers Satisfaction (0-10)[ , ]',
 'Access to tools for automated updates and upgrade, desktop deployment rulesets and tools for Java version management  Importance (0-10)[ , ]',
 'Access to tools for automated updates and upgrade, desktop deployment rulesets and tools for Java version management  Satisfaction (0-10)[ , ]',
 'Monitoring tools Importance (0-10)[ , ]',
 'Monitoring tools Satisfaction (0-10)[ , ]',
 'MOS (My Oracle Support) Importance (0-10)[ , ]',
 'MOS (My Oracle Support) Satisfaction (0-10)[ , ]',
 'Access Cloud Workload and On-premise Importance (0-10)[ , ]',
 'Access Cloud Workload and On-premise Satisfaction (0-10)[ , ]',
 'Flexible licensing terms  Importance (0-10)[ , ]',
 'Flexible licensing terms  Satisfaction (0-10)[ , ]',
 'Legacy Java application support Importance (0-10)[ , ]',
 'Legacy Java application support Satisfaction (0-10)[ , ]',
 'Other * Importance (0-10)[ , ]',
 'Other * Satisfaction (0-10)[ , ]',
  'Methods of communication Importance (0-10)[\xa0,\xa0]',
 'Methods of communication Satisfaction (0-10)[\xa0,\xa0]',
 'Quality of phone support Importance (0-10)[\xa0,\xa0]',
 'Quality of phone support Satisfaction (0-10)[\xa0,\xa0]',
 'Knowledge of support staff Importance (0-10)[\xa0,\xa0]',
 'Knowledge of support staff Satisfaction (0-10)[\xa0,\xa0]',
 'Oracles understanding of your companys needs Importance (0-10)[\xa0,\xa0]',
 'Oracles understanding of your companys needs Satisfaction (0-10)[\xa0,\xa0]',
 'Quality of training provided to your staff Importance (0-10)[\xa0,\xa0]',
 'Quality of training provided to your staff Satisfaction (0-10)[\xa0,\xa0]',
 'Response time to support requests Importance (0-10)[\xa0,\xa0]',
 'Response time to support requests Satisfaction (0-10)[\xa0,\xa0]',           
  'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Staff expertise and availability of skills',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Price/Performance Ratio',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Total cost of ownership',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Large ecosystem of partners ',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Vendor support',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Innovation potential',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Lower risk',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Regulatory compliance',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Market trends',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Competition ',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Higher agility',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Strategic imperative',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Thought leadership and innovation',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Company politics',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Ability to contribute to ongoing innovation of Java platform',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Other *',
 'Please rate the importance of the following reasons driving your organization’s decision to purchase Oracle Java SE Subscription. Please rate each item on a scale from 0 to 10 with 0 = “Not at all important” and 10 = “Extremely important”.  Other * [Can you please specify?]'
            ]

catg_list = ['Country Code',
             'Which one of the below best describes your company’s industry grouping? ',
            'What is your job title? ',
            'What is the approximate annual revenue of your company? ',
            'How long has your organization been using Oracle Java SE Subscription for managing your Java applications? ',
            'How many Java applications does your organization plan to build or deploy in 2020? ',
            'Which of the following statements most closely describes your role in the selection and use of Oracle Java SE Subscription? (Select one)',
            'Are you an employee or official of a government organization (e.g., federal province, state, locality, public higher education institution, etc.), a government owned or controlled entity (e.g., government-owned utilities, etc.), a public international organization, or a political party or candidate for political office?',
            'Do you outsource a significant portion of your organization’s application development function? ',
            'Which platform does your organization primarily develop for?',
             'Approximately how long was the period between the time you first became aware of Java SE Subscription to the time that you purchased it? '
            ]

# Already one-hot-encoded, just convert NA to 0:
dummy0_list = [ ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better tools for maintenance and patching]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better tools for automated updates and upgrades ]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [More cost-effective management and monitoring of Java applications]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Greater flexibility]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Familiarity with platform and development tools]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Easier to use]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Access to open source tools]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Better support provided by vendor, including legacy Java support]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Greater application security]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Access to skilled development resources]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Lower risk of regulatory non-compliance]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Lower risk of application downtime]',
 ' From the list below, please check the top 3 reasons for which you would consider alternatives to Oracle Java SE Subscription. [Other]'
         ]
exlc_list = ['Response ID',
 'IP Address',
 'Timestamp (mm/dd/yyyy)',
 'Duplicate',
 'Time Taken to Complete (Seconds)',
 'Seq. Number',
 'External Reference',
 'Custom Variable 1',
 'Custom Variable 2',
 'Custom Variable 3',
 'Custom Variable 4',
 'Custom Variable 5',
             'Respondent Email',
            'Email List',
            'Region',
            'Pique Solutions, an independent analyst firm, is conducting research sponsored by Oracle among selected IT professionals at leading organizations. The objective of this survey is to gather your feedback related to your Oracle Java SE Subscription experience in order to improve the service based on your feedback.Your input will be shared with Oracle in an aggregate format, and neither your name nor your company’s name will be associated with your comments without your explicit consent. We would like to talk with you about the benefits and shortcomings of your Oracle Java SE Subscription, and the considerations behind your selection of Oracle Java SE.First, please respond to a few questions to determine if you qualify:',
            'What is your job title? [Other (Please specify)]',
             'Which one of the below best describes your company’s industry grouping? [Other]',
            'Which of the following statements most closely describes your role in the selection and use of Oracle Java SE Subscription? (Select one) [Other]',
            'Would you be interested in providing input into the Java SE Subscription offering by participating in an online study about the benefits of Oracle Java SE Subscription to your organization and about your satisfaction with the Subscription?\xa0 ',
            '  You confirm that you may accept the gift card as a thank you for completing the survey and that acceptance is: (1) permissible under your employer’s internal policies; (2) permissible under the laws of your home country and any other law relevant to your employer; and (3) known to and approved by your employer’s management.\xa0Oracle reserves the right to limit attendance accordingly and pursuant to Oracle policy. \xa0 \xa0',
            'Company Name',
            'Thank you for your willingness to participate in this study.',
            'Title',
            'Country',
             'State',
             'Which platform does your organization primarily develop for? [Dynamic Comment]',
            'Can you please specify Linux (Other)?',
             'Can you please specify?',
            'Can you please specify?.1',
            'Alternatives to Oracle Java SE Subscription',
            'Thank you very much for your participation! We appreciate your time and input. We would love to learn more about your Java development support needs and objectives and your experience with Oracle Java SE.\xa0 Please let us know if you would be interested in a followup phone discussion. As a token of appreciation for your spending 20-30 minutes with us on the phone, we would like to offer you a $100 gift card of your choice.',
 'Thank you very much for your participation! We appreciate your time and input. We would love to learn more about your Java development support needs and objectives and your experience with Oracle Java SE.\xa0 Please let us know if you would be interested in a followup phone discussion.\xa0',
 'First Name',
 'Last Name',
 'Phone',
 'Email Address',
 'Please select a gift card you wish to receive for participation in the survey.',
 'Please provide your email address so that we can send you an electronic gift card. Please use the email address to which you received the survey invitation.\xa0']


In [99]:
print(len(cont_list))
print(len(catg_list))
print(len(dummy0_list))
print(len(exlc_list))
print(len(cont_list)+len(catg_list)+len(dummy0_list)+len(exlc_list))

99
11
13
39
162


In [100]:
print(len(raw_df.columns))

162
