# Analysis of json data

In [1]:
students = {
    "firstName": ['sabita', 'bhawana', 'puja', 'sangita'],
    "lastName": ['rajbanshi', 'singh', 'oshin', 'magar'],
    "email": ['sabita@gmail.com', 'bhawana@email.com', 'puja@email.com', 'sangita@yahoo.co.uk']
    
}

In [2]:
students['email']

['sabita@gmail.com',
 'bhawana@email.com',
 'puja@email.com',
 'sangita@yahoo.co.uk']

## Create dataframe from above json data
* DataFrame: rows and columns of dataset
* Series: rows of a single column

In [3]:
#importing pandas
import pandas as pd

In [4]:
#creating dataframe of students
df = pd.DataFrame(students)

In [5]:
df

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [6]:
#return the series of email
df['email']

0       sabita@gmail.com
1      bhawana@email.com
2         puja@email.com
3    sangita@yahoo.co.uk
Name: email, dtype: object

In [7]:
df.email

0       sabita@gmail.com
1      bhawana@email.com
2         puja@email.com
3    sangita@yahoo.co.uk
Name: email, dtype: object

In [8]:
#datatype Series
type(df['email'])

pandas.core.series.Series

In [9]:
#datatype DataFrame
type(df)

pandas.core.frame.DataFrame

In [10]:
#return list of two columns, pass a list inside bracket '[ ]'.
df[['lastName','email']]

Unnamed: 0,lastName,email
0,rajbanshi,sabita@gmail.com
1,singh,bhawana@email.com
2,oshin,puja@email.com
3,magar,sangita@yahoo.co.uk


In [11]:
#to get all the columns
df.columns

Index(['firstName', 'lastName', 'email'], dtype='object')

In [12]:
#to get rows use loc and iloc indexers
#get first row, index 0
df.iloc[0]

firstName              sabita
lastName            rajbanshi
email        sabita@gmail.com
Name: 0, dtype: object

In [13]:
#get rows till index 1
df.iloc[[0, 1]]

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com


In [14]:
#get only the email address till index 1
#first is row index and second is column index
df.iloc[[0, 1], 2]

0     sabita@gmail.com
1    bhawana@email.com
Name: email, dtype: object

In [15]:
df.loc[[0, 1]]

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com


In [16]:
df.loc[[0, 1], ['email', 'lastName']]

Unnamed: 0,email,lastName
0,sabita@gmail.com,rajbanshi
1,bhawana@email.com,singh


# Analysis of stack overflow developer survey

In [17]:
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [18]:
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


In [19]:
schema_df.head()

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,Age,What is your age (in years)? If you prefer not...
4,Age1stCode,At what age did you write your first line of c...


### to see shape of dataframe

In [20]:
df.shape

(64461, 61)

In [21]:
schema_df.shape

(61, 2)

### to see all the columns of dataframe

In [22]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq',
       'CompTotal', 'ConvertedComp', 'Country', 'CurrencyDesc',
       'CurrencySymbol', 'DatabaseDesireNextYear', 'DatabaseWorkedWith',
       'DevType', 'EdLevel', 'Employment', 'Ethnicity', 'Gender', 'JobFactors',
       'JobSat', 'JobSeek', 'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
  

In [23]:
schema_df.columns

Index(['Column', 'QuestionText'], dtype='object')

In [24]:
#get hobbyist column
df['Hobbyist']

0        Yes
1         No
2        Yes
3        Yes
4        Yes
        ... 
64456    Yes
64457    Yes
64458    Yes
64459    Yes
64460    Yes
Name: Hobbyist, Length: 64461, dtype: object

In [25]:
#count the unique value
df['Hobbyist'].value_counts()

Yes    50388
No     14028
Name: Hobbyist, dtype: int64

In [26]:
#slicing values rows from index 0 to 2 and column of Hobbyist
df.loc[0:2, 'Hobbyist']

0    Yes
1     No
2    Yes
Name: Hobbyist, dtype: object

In [27]:
#slicing values rows from index 0 to 2 and column from Hobbyist to Employment
df.loc[0:2, 'Hobbyist':'Employment']

Unnamed: 0,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment
0,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em..."
1,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time
2,Yes,,15,,,,Russian Federation,,,,,,,
