# Pandas - Data Structures

In [5]:
# import pandas library
import pandas as pd
import numpy as np

## Pandas - Series
### pandas.Series(data, index, dtype, copy)

In [11]:
# Create an Empty Series
s = pd.Series()
print(s)

Series([], dtype: float64)


  s = pd.Series()


In [12]:
# Create a Series from ndarray
data = np.array(['a', 'b', 'c'])
s = pd.Series(data)
print(s)

print()

# Create a series data with given index
s = pd.Series(data, index=[100, 200, 300])
print(s)

0    a
1    b
2    c
dtype: object

100    a
200    b
300    c
dtype: object


In [18]:
# Create a series from dict
data = {'a': 1, 'b': 2, 'c': 3}
s = pd.Series(data)
print(s, '\n')

s = pd.Series(data, index=['a', 'b', 'c', 'd'])
print(s, '\n')

# Create custom index using dictionary keys
s = pd.Series(data, index=['b', 'c', 'd', 'a'])
print(s)

a    1
b    2
c    3
dtype: int64 

a    1.0
b    2.0
c    3.0
d    NaN
dtype: float64 

b    2.0
c    3.0
d    NaN
a    1.0
dtype: float64


In [23]:
# Create a series from scalar
s = pd.Series(5, index=[0, 1, 2, 3])
print(s)

0    5
1    5
2    5
3    5
dtype: int64


In [35]:
# access data from series
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(s)

print(s[0], '\n')
print(s[:3], '\n')
print(s[-3:], '\n')

# Retrieve data using label index
print(s['a'], '\n')

# retrieve multiple elements
print(s[['a', 'c', 'd', 'e']])

a    1
b    2
c    3
d    4
e    5
dtype: int64
1 

a    1
b    2
c    3
dtype: int64 

c    3
d    4
e    5
dtype: int64 

1 

a    1
c    3
d    4
e    5
dtype: int64


## Pandas - DataFrame
### pandas.DataFrame( data, index, columns, dtype, copy)

In [40]:
# create an empty dataframe
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [46]:
# Create a dataframe from list

# Example 1
data = [1, 2, 3, 4]
df = pd.DataFrame(data)
print(df)

# Example 2
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data, columns=['Name', 'Age'])
print(df)

# Example 3
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data, index=['a', 'b', 'C'], columns=['Name', 'Age'], dtype=float)
print(df)

   0
0  1
1  2
2  3
3  4
     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13
     Name   Age
a    Alex  10.0
b     Bob  12.0
C  Clarke  13.0


In [49]:
# Create a DataFrame from Dict of ndarrays/list
# Example 1
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricy'], 'Age':[28, 34, 29, 42]}
df = pd.DataFrame(data)
print(df)

# Example 2
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3', 'rank4'])
print(df)

    Name  Age
0    Tom   28
1   Jack   34
2  Steve   29
3   Ricy   42
        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  Steve   29
rank4   Ricy   42


In [52]:
# Create a DataFrame from List of Dics
# Example 1
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
print(df)

print()

# Example 2
df = pd.DataFrame(data, index=['first', 'second'])
print(df)

   a   b     c
0  1   2   NaN
1  5  10  20.0

        a   b     c
first   1   2   NaN
second  5  10  20.0


In [59]:
# Access datas from DataFrame
data = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)
print(df)

# column selection
df['three'] = [10, 20, 30, 40]
print(df)

df['four'] = df['two'] + df['three']
print(df)

# column deletion
print('\nDeleting the first column:')
del df['one'] # or use df.pop('one')
print(df)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4
   one  two  three
a  1.0    1     10
b  2.0    2     20
c  3.0    3     30
d  NaN    4     40
   one  two  three  four
a  1.0    1     10    11
b  2.0    2     20    22
c  3.0    3     30    33
d  NaN    4     40    44

Deleting the first column:
   two  three  four
a    1     10    11
b    2     20    22
c    3     30    33
d    4     40    44


In [69]:
# Row Selection, Addition and Deletion
print('Our Data Set:\n', df)
print()

# select row by name
print(df.loc['b'])

# select row by integer location
print(df.iloc[2])
print()

# select multiple rows
print(df[1:4])

Our Data Set:
    two  three  four
a    1     10    11
b    2     20    22
c    3     30    33
d    4     40    44

two       2
three    20
four     22
Name: b, dtype: int64
two       3
three    30
four     33
Name: c, dtype: int64

   two  three  four
b    2     20    22
c    3     30    33
d    4     40    44


In [73]:
# Add new rows
df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

df = df.append(df2)
print(df, '\n')

# Delete rows
df = df.drop(0)
print(df)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8 

   a  b
1  3  4
1  7  8
