# Analysis of json data

In [1]:
import pandas as pd

In [2]:
students = {
    "firstName": ['sabita', 'bhawana', 'puja', 'sangita'],
    "lastName": ['rajbanshi', 'singh', 'oshin', 'magar'],
    "email": ['sabita@gmail.com', 'bhawana@email.com', 'puja@email.com', 'sangita@yahoo.co.uk']
    
}

In [3]:
df = pd.DataFrame(students)
df.head()

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [4]:
df['email']

0       sabita@gmail.com
1      bhawana@email.com
2         puja@email.com
3    sangita@yahoo.co.uk
Name: email, dtype: object

In [5]:
df

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [6]:
#to set email as index 
df.set_index('email', inplace=True)

In [7]:
df

Unnamed: 0_level_0,firstName,lastName
email,Unnamed: 1_level_1,Unnamed: 2_level_1
sabita@gmail.com,sabita,rajbanshi
bhawana@email.com,bhawana,singh
puja@email.com,puja,oshin
sangita@yahoo.co.uk,sangita,magar


In [8]:
df.index

Index(['sabita@gmail.com', 'bhawana@email.com', 'puja@email.com',
       'sangita@yahoo.co.uk'],
      dtype='object', name='email')

In [9]:
#get location
df.loc['sabita@gmail.com']

firstName       sabita
lastName     rajbanshi
Name: sabita@gmail.com, dtype: object

In [10]:
df.loc['sabita@gmail.com', 'lastName']

'rajbanshi'

In [11]:
#get integer location
df.iloc[0]

firstName       sabita
lastName     rajbanshi
Name: sabita@gmail.com, dtype: object

In [12]:
#to reset the index
df.reset_index(inplace=True)
df

Unnamed: 0,email,firstName,lastName
0,sabita@gmail.com,sabita,rajbanshi
1,bhawana@email.com,bhawana,singh
2,puja@email.com,puja,oshin
3,sangita@yahoo.co.uk,sangita,magar


# Analysis of stack overflow developer survey

In [13]:
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [14]:
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


## Data cleaning
* Here "Respondent" column has unique id so use that column as a dataframe index

In [15]:
#set column 'Respondent' as index
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')
df.head()

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


In [16]:
df.columns

Index(['MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq', 'CompTotal',
       'ConvertedComp', 'Country', 'CurrencyDesc', 'CurrencySymbol',
       'DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType', 'EdLevel',
       'Employment', 'Ethnicity', 'Gender', 'JobFactors', 'JobSat', 'JobSeek',
       'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
       'Webframe

In [17]:
schema_df.head()

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,Age,What is your age (in years)? If you prefer not...
4,Age1stCode,At what age did you write your first line of c...


In [18]:
#set column 'Column' as index
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')
schema_df.head()

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...
Hobbyist,Do you code as a hobby?
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...


In [19]:
schema_df.loc['Hobbyist']

QuestionText    Do you code as a hobby?
Name: Hobbyist, dtype: object

In [20]:
schema_df.loc['Country']

QuestionText    Where do you live?
Name: Country, dtype: object

In [21]:
schema_df.loc['Hobbyist', 'QuestionText']

'Do you code as a hobby?'

In [22]:
#sort alphabetically (default=ascending)
schema_df.sort_index()

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
CompTotal,What is your current total compensation (salar...
ConvertedComp,Salary converted to annual USD salaries using ...
...,...
WebframeWorkedWith,Which web frameworks have you done extensive d...
WelcomeChange,"Compared to last year, how welcome do you feel..."
WorkWeekHrs,"On average, how many hours per week do you wor..."
YearsCode,"Including any education, how many years have y..."


In [23]:
#sort decending order
schema_df.sort_index(ascending=False)

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
YearsCodePro,"NOT including education, how many years have y..."
YearsCode,"Including any education, how many years have y..."
WorkWeekHrs,"On average, how many hours per week do you wor..."
WelcomeChange,"Compared to last year, how welcome do you feel..."
WebframeWorkedWith,Which web frameworks have you done extensive d...
...,...
ConvertedComp,Salary converted to annual USD salaries using ...
CompTotal,What is your current total compensation (salar...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
Age1stCode,At what age did you write your first line of c...


In [24]:
#sorting permanently
schema_df.sort_index(inplace=True)
schema_df

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
CompTotal,What is your current total compensation (salar...
ConvertedComp,Salary converted to annual USD salaries using ...
...,...
WebframeWorkedWith,Which web frameworks have you done extensive d...
WelcomeChange,"Compared to last year, how welcome do you feel..."
WorkWeekHrs,"On average, how many hours per week do you wor..."
YearsCode,"Including any education, how many years have y..."
