# Analysis of json data

In [1]:
students = {
    "firstName": ['sabita', 'bhawana', 'puja', 'sangita'],
    "lastName": ['rajbanshi', 'singh', 'oshin', 'magar'],
    "email": ['sabita@gmail.com', 'bhawana@email.com', 'puja@email.com', 'sangita@yahoo.co.uk']
    
} 

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(students)

In [4]:
df.head()

Unnamed: 0,firstName,lastName,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [5]:
df.columns

Index(['firstName', 'lastName', 'email'], dtype='object')

In [6]:
#alter the column names firstName to first and lastName to last
df.columns = ['first', 'last', 'email']
df

Unnamed: 0,first,last,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [7]:
#update column names with uppercase letter
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST,LAST,EMAIL
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [8]:
#update column names with lowercase letter
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first,last,email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [9]:
#rename column names
df.rename(columns={'first': 'FirstName', 'last': 'LastName', 'email': 'Email'}, inplace=True)
df

Unnamed: 0,FirstName,LastName,Email
0,sabita,rajbanshi,sabita@gmail.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [10]:
#replace values in index location
df.loc[0] = ['Sasang', 'Singh', 'sasa@email.com']
df

Unnamed: 0,FirstName,LastName,Email
0,Sasang,Singh,sasa@email.com
1,bhawana,singh,bhawana@email.com
2,puja,oshin,puja@email.com
3,sangita,magar,sangita@yahoo.co.uk


In [11]:
#update values in index location 2
df.loc[2, ['LastName', 'Email']] = ['Rajbanshi', 'oshin@gmail.com']
df

Unnamed: 0,FirstName,LastName,Email
0,Sasang,Singh,sasa@email.com
1,bhawana,singh,bhawana@email.com
2,puja,Rajbanshi,oshin@gmail.com
3,sangita,magar,sangita@yahoo.co.uk


In [12]:
#using .at
df.at[2, 'FirstName'] = 'Oshin'
df

Unnamed: 0,FirstName,LastName,Email
0,Sasang,Singh,sasa@email.com
1,bhawana,singh,bhawana@email.com
2,Oshin,Rajbanshi,oshin@gmail.com
3,sangita,magar,sangita@yahoo.co.uk


In [13]:
#filter email
filt = (df['Email'] == 'sasa@email.com')
filt

0     True
1    False
2    False
3    False
Name: Email, dtype: bool

In [14]:
#update lastname
df.loc[filt, 'LastName'] = 'Chettri'
df

Unnamed: 0,FirstName,LastName,Email
0,Sasang,Chettri,sasa@email.com
1,bhawana,singh,bhawana@email.com
2,Oshin,Rajbanshi,oshin@gmail.com
3,sangita,magar,sangita@yahoo.co.uk


In [15]:
#update column firstname with uppercase
df['FirstName'] = df['FirstName'].str.upper()
df

Unnamed: 0,FirstName,LastName,Email
0,SASANG,Chettri,sasa@email.com
1,BHAWANA,singh,bhawana@email.com
2,OSHIN,Rajbanshi,oshin@gmail.com
3,SANGITA,magar,sangita@yahoo.co.uk


## Methods
* apply
* map
* applymap
* replace

In [16]:
#find length of email
df['Email'].apply(len)

0    14
1    17
2    15
3    19
Name: Email, dtype: int64

In [17]:
#function to convert lastname into uppercase
def update_Email(Email):
    return Email.upper()

In [18]:
df['Email'].apply(update_Email)

0         SASA@EMAIL.COM
1      BHAWANA@EMAIL.COM
2        OSHIN@GMAIL.COM
3    SANGITA@YAHOO.CO.UK
Name: Email, dtype: object

In [19]:
#apply function to the dataframe
df['Email'] = df['Email'].apply(update_Email)
df

Unnamed: 0,FirstName,LastName,Email
0,SASANG,Chettri,SASA@EMAIL.COM
1,BHAWANA,singh,BHAWANA@EMAIL.COM
2,OSHIN,Rajbanshi,OSHIN@GMAIL.COM
3,SANGITA,magar,SANGITA@YAHOO.CO.UK


In [20]:
#again apply lowercase method using lambda
df['Email'] = df['Email'].apply(lambda x: x.lower())
df

Unnamed: 0,FirstName,LastName,Email
0,SASANG,Chettri,sasa@email.com
1,BHAWANA,singh,bhawana@email.com
2,OSHIN,Rajbanshi,oshin@gmail.com
3,SANGITA,magar,sangita@yahoo.co.uk


In [21]:
#length of email column
df['Email'].apply(len)

0    14
1    17
2    15
3    19
Name: Email, dtype: int64

In [22]:
#length of all columns
df.apply(len)

FirstName    4
LastName     4
Email        4
dtype: int64

In [23]:
df.apply(len, axis='columns')

0    3
1    3
2    3
3    3
dtype: int64

In [24]:
#get minimum values from each column
df.apply(pd.Series.min)

FirstName              BHAWANA
LastName               Chettri
Email        bhawana@email.com
dtype: object

In [25]:
df.apply(lambda x: x.min())

FirstName              BHAWANA
LastName               Chettri
Email        bhawana@email.com
dtype: object

In [26]:
#get length of each values of dataframe
df.applymap(len)

Unnamed: 0,FirstName,LastName,Email
0,6,7,14
1,7,5,17
2,5,9,15
3,7,5,19


In [27]:
#get lowercase of each values in dataframe
df.applymap(str.lower)

Unnamed: 0,FirstName,LastName,Email
0,sasang,chettri,sasa@email.com
1,bhawana,singh,bhawana@email.com
2,oshin,rajbanshi,oshin@gmail.com
3,sangita,magar,sangita@yahoo.co.uk


In [28]:
df['FirstName'].map({'sasang': 'SASA', 'sangita': 'Gharti'})

0    NaN
1    NaN
2    NaN
3    NaN
Name: FirstName, dtype: object

# Analysis of stack overflow developer survey

In [29]:
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')

In [30]:
df.head()

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


In [31]:
#rename column name
df.rename(columns={'ConvertedComp': 'USDSalary'}, inplace=True)
df

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,USDSalary,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,,Yes,,16,,,,United States,,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64867,,Yes,,,,,,Morocco,,,...,,,,,,,,,,
64898,,Yes,,,,,,Viet Nam,,,...,,,,,,,,,,
64925,,Yes,,,,,,Poland,,,...,,,,,Angular;Angular.js;React.js,,,,,


In [32]:
df['USDSalary']

Respondent
1       NaN
2       NaN
3       NaN
4       NaN
5       NaN
         ..
64858   NaN
64867   NaN
64898   NaN
64925   NaN
65112   NaN
Name: USDSalary, Length: 64461, dtype: float64

In [33]:
df['Hobbyist']

Respondent
1        Yes
2         No
3        Yes
4        Yes
5        Yes
        ... 
64858    Yes
64867    Yes
64898    Yes
64925    Yes
65112    Yes
Name: Hobbyist, Length: 64461, dtype: object

In [34]:
#map all yes/no into True/False
df['Hobbyist'].map({'Yes':True, 'No':False})

Respondent
1         True
2        False
3         True
4         True
5         True
         ...  
64858     True
64867     True
64898     True
64925     True
65112     True
Name: Hobbyist, Length: 64461, dtype: object

In [35]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes':True, 'No':False})
df

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,USDSalary,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,True,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,False,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
3,I code primarily as a hobby,True,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,True,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
5,"I used to be a developer by profession, but no...",True,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,,True,,16,,,,United States,,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64867,,True,,,,,,Morocco,,,...,,,,,,,,,,
64898,,True,,,,,,Viet Nam,,,...,,,,,,,,,,
64925,,True,,,,,,Poland,,,...,,,,,Angular;Angular.js;React.js,,,,,
