# Data Wrangling
### Creating a Data Frame

In [2]:
import pandas as pd
dataframe = pd.DataFrame()

###Navigating DataFrames

In [10]:
# select the first row
print(df.iloc[0])

# select three rows
print(df.iloc[1:4])

# all rows up to and including the fourth row
print(df.iloc[:4])

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: 0, dtype: object
                                            Name PClass   Age     Sex  \
1                    Allison, Miss Helen Loraine    1st   2.0  female   
2            Allison, Mr Hudson Joshua Creighton    1st  30.0    male   
3  Allison, Mrs Hudson JC (Bessie Waldo Daniels)    1st  25.0  female   

   Survived  SexCode  
1         0        1  
2         0        0  
3         0        1  
                                            Name PClass   Age     Sex  \
0                   Allen, Miss Elisabeth Walton    1st  29.0  female   
1                    Allison, Miss Helen Loraine    1st   2.0  female   
2            Allison, Mr Hudson Joshua Creighton    1st  30.0    male   
3  Allison, Mrs Hudson JC (Bessie Waldo Daniels)    1s

In [13]:
# set index
df = df.set_index(df['Name'])

# show row
df.loc['Allen, Miss Elisabeth Walton']

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: Allen, Miss Elisabeth Walton, dtype: object

##  Replacing Values

In [16]:
# replace any instance of 'female' with Woman
df['Sex'].replace('female', 'Woman').head(2)

Name
Allen, Miss Elisabeth Walton    Woman
Allison, Miss Helen Loraine     Woman
Name: Sex, dtype: object

In [18]:
# replace any instance of 'female' with Woman
df['Sex'].replace(['female', 'male'], ['Woman', 'Man']).head(5)

Name
Allen, Miss Elisabeth Walton                     Woman
Allison, Miss Helen Loraine                      Woman
Allison, Mr Hudson Joshua Creighton                Man
Allison, Mrs Hudson JC (Bessie Waldo Daniels)    Woman
Allison, Master Hudson Trevor                      Man
Name: Sex, dtype: object

##  Finding the Min, Max, Sum, Average, and Count

In [24]:
print('Maximum: {}'.format(df['Age'].max()))
print('Minimum: {}'.format(df['Age'].min()))
print('Mean: {}'.format(df['Age'].mean()))
print('Sum: {}'.format(df['Age'].sum()))
print('Count: {}'.format(df['Age'].count()))

Maximum: 71.0
Minimum: 0.17
Mean: 30.397989417989418
Sum: 22980.88
Count: 756


In [27]:
print("Variance: {}".format(df.var()))
print("Standard Deviation: {}".format(df.std()))
print("Kurtosis: {}".format(df.kurt()))
print("Skewness: {}".format(df.skew()))


Variance: Age         203.320470
Survived      0.225437
SexCode       0.228230
dtype: float64
Standard Deviation: Age         14.259049
Survived     0.474802
SexCode      0.477734
dtype: float64
Kurtosis: Age        -0.036536
Survived   -1.562162
SexCode    -1.616702
dtype: float64
Skewness: Age         0.368511
Survived    0.663491
SexCode     0.621098
dtype: float64


##  Finding Unique Values

In [29]:
# unique will return an array of all unique values in a column
df['Sex'].unique()

array(['female', 'male'], dtype=object)

In [31]:
# value_counts will display all unique values with the number of times each value appears
df['Sex'].value_counts()

male      851
female    462
Name: Sex, dtype: int64

##  Grouping Rows by Values

In [40]:
df.groupby('Survived')['Name'].count()

Survived
0    863
1    450
Name: Name, dtype: int64

In [41]:
df.groupby(['Sex', 'Survived'])['Age'].mean()

Sex     Survived
female  0           24.901408
        1           30.867143
male    0           32.320780
        1           25.951875
Name: Age, dtype: float64

##  Looping Over a Column

In [43]:
# for .. in .. loop
for name in df['Name'][0:2]:
    print(name.upper())

ALLEN, MISS ELISABETH WALTON
ALLISON, MISS HELEN LORAINE


In [45]:
# list comprehension (more "pythonic")
[name.upper() for name in df['Name'][0:2]]

['ALLEN, MISS ELISABETH WALTON', 'ALLISON, MISS HELEN LORAINE']

##  Applying a Function Over All Elements in a Column

In [46]:
def uppercase(x):
    return x.upper()

df['Name'].apply(uppercase)[0:2]

Name
Allen, Miss Elisabeth Walton    ALLEN, MISS ELISABETH WALTON
Allison, Miss Helen Loraine      ALLISON, MISS HELEN LORAINE
Name: Name, dtype: object