# Pandas Tutorial

## Import Requirements

In [1]:
import numpy as np
import pandas as pd

### Series

In [2]:
# Example 1: create an empty Series
a = pd.Series()
a

  a = pd.Series()


Series([], dtype: float64)

In [3]:
# Example 2: Create a Series from an ndarray
data = np.array(['a', 'b', 'c', 'd'])
b = pd.Series(data)  # Since we didn't pass an index, the default index in zero based integers
print(data)
print("")
print(b)

['a' 'b' 'c' 'd']

0    a
1    b
2    c
3    d
dtype: object


In [4]:
# Example 3: Create a Series from a dictionary, dictionary keys are used as indices
data = {'a':0, 'b':1, 'c':2}
c = pd.Series(data)
print(data)
print("")
print(c)

{'a': 0, 'b': 1, 'c': 2}

a    0
b    1
c    2
dtype: int64


In [5]:
# Example 4: Create a Series from a dictionary and apply your own indices, notice the unrecognized index is added but its value is NaN
data = {'a':0, 'b':1, 'c':2}
d = pd.Series(data, index=['b', 'c', 'd', 'a'])
print(data)
print("")
print(d)

{'a': 0, 'b': 1, 'c': 2}

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64


In [6]:
# Example 5: Create a Series from a scalar
scalar = 5
e = pd.Series(scalar, index=['0', '1', '2', '3'])
e

0    5
1    5
2    5
3    5
dtype: int64

In [7]:
# Example 6: Accessing data from a Series with it's position, select first value
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f[0]

a    1
b    2
c    3
d    4
e    5
dtype: int64



1

In [8]:
# Example 7: Accessing data from a Series with it's position, select first 3 values
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f[:3]

a    1
b    2
c    3
d    4
e    5
dtype: int64



a    1
b    2
c    3
dtype: int64

In [9]:
# Example 8: Accessing data from a Series with it's position
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f[-3:]

a    1
b    2
c    3
d    4
e    5
dtype: int64



c    3
d    4
e    5
dtype: int64

In [10]:
# Example 9: Accessing data from a Series with it's index (label)
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f['a']

a    1
b    2
c    3
d    4
e    5
dtype: int64



1

In [11]:
# Example 10: Accessing data from a Series with it's index (label), multiple elements
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f[['a', 'b', 'c']]

a    1
b    2
c    3
d    4
e    5
dtype: int64



a    1
b    2
c    3
dtype: int64

In [12]:
# Example 9: Accessing data from a Series with it's index (label), if a label is non-existent an error is raised
f = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f)
print("")
f['h']

a    1
b    2
c    3
d    4
e    5
dtype: int64



KeyError: 'h'

### DataFrame

DataFrame properties:
- Columns can be different dtypes
- Size is mutable
- Has labeled axes (rows and columns)
- Operations can be performaed accross rows and columns (uses the vectorized operations of NumPy)
- Created from lists, dictonaries, Series, Numpy arrays, or other DataFrames

In [13]:
# Example 1: Create an empty DataFrame
df = pd.DataFrame()
df

In [14]:
# Example 2: Create a DataFrame from lists
data = [1, 2, 3, 4, 5]
df = pd.DataFrame(data)
df

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [15]:
# Example 3: Create a DataFrame from a list of lists and specify column headers
data = [['Alex', 10], ['Bob', 12], ['Clarke', 13]]
df = pd.DataFrame(data, columns=['Name', 'Age'])
df

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,13


In [16]:
# Example 4: Create a DataFrame from a list of lists, specify column headers, and specify dtype
data = [['Alex', 10], ['Bob', 12], ['Clarke', 13]]
df = pd.DataFrame(data, columns=['Name', 'Age'], dtype=float)
df

Unnamed: 0,Name,Age
0,Alex,10.0
1,Bob,12.0
2,Clarke,13.0


In [17]:
# Example 5: Create a DataFrame from a dictionary of ndarrays/lists
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'], 'Age':[28, 34, 29, 42]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


In [18]:
# Example 6: Create an indexed DataFrame using arrays
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'], 'Age':[28, 34, 29, 42]}
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3', 'rank4'])
df

Unnamed: 0,Name,Age
rank1,Tom,28
rank2,Jack,34
rank3,Steve,29
rank4,Ricky,42


In [19]:
# Example 7: Create a DataFrame from a dictionary of Series
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [20]:
# Example 8: Column selection
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data)
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [21]:
# Example 9: Adding new columns and arithmetic across columns
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data)
print(df)
print("")

# Add new column
df['three'] = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(df)
print("")

# Create new column using arithmetic on existing columns
df['four'] = df['one'] + df['three']
print(df)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4

   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN

   one  two  three  four
a  1.0    1   10.0  11.0
b  2.0    2   20.0  22.0
c  3.0    3   30.0  33.0
d  NaN    4    NaN   NaN


In [22]:
# Example 10: Deleting columns
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
       'three':pd.Series([10, 20, 30], index=['a', 'b', 'c'])}
df = pd.DataFrame(data)
print(df)
print("")

# Delete a column
df.pop('two')
print(df)

   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN

   one  three
a  1.0   10.0
b  2.0   20.0
c  3.0   30.0
d  NaN    NaN


In [23]:
# Example 11: Row selection, addition, and deletion
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)
print(df)
print("")

# Select specific row, result is a Series with the column names as labels
print(df.loc['b'])
print("")

# Selection by integer location, result is a Series with the column names as labels
print(df.iloc[2])

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4

one    2.0
two    2.0
Name: b, dtype: float64

one    3.0
two    3.0
Name: c, dtype: float64


In [24]:
# Example 12: Slice rows
data = {'one':pd.Series([1, 2, 3], index=['a', 'b', 'c']),
       'two':pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data)

df[2:4]

Unnamed: 0,one,two
c,3.0,3
d,,4


In [25]:
# Example 13: Addition of rows from another DataFrame
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['a', 'b'])

df = df.append(df2)
df

Unnamed: 0,a,b
0,1,2
1,3,4
0,5,6
1,7,8


In [26]:
# Example 14: Deleting rows
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['a', 'b'])

df = df.append(df2)
print(df)
print("")

df = df.drop(0)  # Drop all rows with index 0, in this case it happens to be multiple rows because there was an index 0 in each DataFrame we appended to each other
print(df)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8

   a  b
1  3  4
1  7  8


### Series Basic Functionality

In [27]:
# Example 1: Check if a Series is empty
g = pd.Series(np.random.randn(4))
print(g)
print("")

g.empty

0   -0.736508
1   -0.674250
2    2.069293
3    1.451867
dtype: float64



False

In [28]:
# Example 2: Find the number of dimensions of the object
g.ndim

1

In [29]:
# Example 3: Find the size of the object, for a Series that's only going to return the length because all Series are 1D
g.size

4

In [30]:
# Example 4: Return all the values of the Series as an array
g.values

array([-0.73650786, -0.67425016,  2.06929307,  1.45186706])

In [31]:
# Example 5: View the top n rows or bottom n rows of a Series
print(g)
print("")

print(g.head(2))
print("")

print(g.tail(2))

0   -0.736508
1   -0.674250
2    2.069293
3    1.451867
dtype: float64

0   -0.736508
1   -0.674250
dtype: float64

2    2.069293
3    1.451867
dtype: float64


### DataFrame Basic Functionality

In [32]:
# Example 1: Transpose the DataFrame
data = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
        'Age':pd.Series([25,26,25,23,30,29,23]),
        'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
df = pd.DataFrame(data)
print(df)
print("")

print(df.T)

    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   29    4.60
6   Jack   23    3.80

           0      1      2     3      4      5     6
Name     Tom  James  Ricky   Vin  Steve  Smith  Jack
Age       25     26     25    23     30     29    23
Rating  4.23   3.24   3.98  2.56    3.2    4.6   3.8


In [33]:
# Example 2: Find the DataFrame dtypes
df.dtypes

Name       object
Age         int64
Rating    float64
dtype: object

In [34]:
# Example 3: Check whether the DataFrame object is empty or not
df.empty

False

In [35]:
# Example 4: check for the number of dimensions of the DataFrame
df.ndim

2

In [36]:
# Example 5: Check for the shape of the DataFrame
df.shape

(7, 3)

In [37]:
# Example 6: Check for the size of the DataFrame
df.size  # Returns the number of all values in the DataFrame

21

In [38]:
# Example 7: Return the data in the DataFrame as an ndarray
df.values

array([['Tom', 25, 4.23],
       ['James', 26, 3.24],
       ['Ricky', 25, 3.98],
       ['Vin', 23, 2.56],
       ['Steve', 30, 3.2],
       ['Smith', 29, 4.6],
       ['Jack', 23, 3.8]], dtype=object)

In [39]:
# Example 8: View the top rows and the bottom rows of the DataFrame
# Top 2 rows
print(df.head(2))
print("")

# Bottom 2 rows
print(df.tail(2))

    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24

    Name  Age  Rating
5  Smith   29     4.6
6   Jack   23     3.8
