## Create an empty Series
### A basic series, which can be created is an Empty Series.

In [2]:
#import the pandas library and aliasing as pd
import pandas as pd
s = pd.Series()
print (s)

Series([], dtype: float64)


## Create a Series from ndarray

In [6]:
#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
s = pd.Series(data)
print (s)

0    a
1    b
2    c
3    d
dtype: object


In [7]:
# pass the index values
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[100,101,102,103])
print (s)

100    a
101    b
102    c
103    d
dtype: object


## Create a Series from dict

In [10]:
# Dictionary keys are used to construct index.
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data)
s

a    0.0
b    1.0
c    2.0
dtype: float64

In [12]:
# Index order is persisted and the missing element is filled with NaN (Not a Number).
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data,index=['b','c','d','a'])
print (s)

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64


## Create a Series from Scalar

In [13]:
# If data is a scalar value, an index must be provided. The value will be repeated to match the length of index
s = pd.Series(5, index=[0, 1, 2, 3])
print (s)

0    5
1    5
2    5
3    5
dtype: int64


## Accessing Data from Series with Position

In [26]:
# Data in the series can be accessed similar to that in an ndarray.
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])

#retrieve the first element
print (s[0])
print ("linebreak")

#retrieve the first three element
print (s[:3])
print ("linebreak")

#retrieve the last three element
print (s[-3:])

1
linebreak
a    1
b    2
c    3
dtype: int64
linebreak
c    3
d    4
e    5
dtype: int64


## Retrieve Data Using Label (Index)

In [31]:
# Retrieve a single element using index label value.
#retrieve a single element
print (s['a'])
print ("linebreak")
#retrieve multiple elements
print (s[['a','c','d']])

1
linebreak
a    1
c    3
d    4
dtype: int64


In [33]:
#If a label is not contained, an exception is raised.
print ("linebreak")
print (s['f'])

linebreak


KeyError: 'f'

## pandas.DataFrame

## Create an Empty DataFrame

In [34]:
#import the pandas library and aliasing as pd
import pandas as pd
df = pd.DataFrame()
print (df)

Empty DataFrame
Columns: []
Index: []


In [35]:
# Create a DataFrame from Lists
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print (df)

   0
0  1
1  2
2  3
3  4
4  5


In [36]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print (df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [37]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
print (df)

     Name   Age
0    Alex  10.0
1     Bob  12.0
2  Clarke  13.0


In [39]:
# Create a DataFrame from Dict of ndarrays / Lists
# values 0,1,2,3. They are the default index assigned to each using the function range(n).
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
print (df)

    Name  Age
0    Tom   28
1   Jack   34
2  Steve   29
3  Ricky   42


In [41]:
# customized index
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print (df)

        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  Steve   29
rank4  Ricky   42


In [43]:
# NaN (Not a Number) is appended in missing areas.
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
print (df)

   a   b     c
0  1   2   NaN
1  5  10  20.0


In [45]:
df = pd.DataFrame(data, index=['first', 'second'])
print (df)

        a   b     c
first   1   2   NaN
second  5  10  20.0


In [46]:
#With two column indices, values same as dictionary keys
df1 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b'])

#With two column indices with one index with other name
df2 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b1'])
print (df1)
print (df2)

        a   b
first   1   2
second  5  10
        a  b1
first   1 NaN
second  5 NaN


In [49]:
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
      'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print (df)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4


In [52]:
# Column selection
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
      'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print (df ['one'])

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64


In [53]:
# Adding ccolumns
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
      'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)

# Adding a new column to an existing DataFrame object with column label by passing new series

print ("Adding a new column by passing as Series:")
df['three']=pd.Series([10,20,30],index=['a','b','c'])
print (df)

print ("Adding a new column using the existing columns in DataFrame:")
df['four']=df['one']+df['three']

print (df)

Adding a new column by passing as Series:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN
Adding a new column using the existing columns in DataFrame:
   one  two  three  four
a  1.0    1   10.0  11.0
b  2.0    2   20.0  22.0
c  3.0    3   30.0  33.0
d  NaN    4    NaN   NaN


In [54]:
# Column Deletion
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
     'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']), 
     'three' : pd.Series([10,20,30], index=['a','b','c'])}

df = pd.DataFrame(d)
print ("Our dataframe is:")
print (df)

# using del function
print ("Deleting the first column using DEL function:")
del df['one']
print (df)

# using pop function
print ("Deleting another column using POP function:")
df.pop('two')
print (df)

Our dataframe is:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN
Deleting the first column using DEL function:
   two  three
a    1   10.0
b    2   20.0
c    3   30.0
d    4    NaN
Deleting another column using POP function:
   three
a   10.0
b   20.0
c   30.0
d    NaN


## Row Selection, Addition, and Deletion

In [56]:
#Rows can be selected by passing row label to a loc function.
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
     'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print (df.loc['b'])

one    2.0
two    2.0
Name: b, dtype: float64


In [58]:
#Selection by integer location
#Rows can be selected by passing integer location to an iloc function.
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print (df.iloc[2])

one    3.0
two    3.0
Name: c, dtype: float64


In [59]:
#Slice Rows
#Multiple rows can be selected using ‘ : ’ operator.
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
    'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print (df[2:4])

   one  two
c  3.0    3
d  NaN    4


In [60]:
#Addition of Rows
#Add new rows to a DataFrame using the append function. This function will append the rows at the end.
df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

df = df.append(df2)
print (df)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8


In [61]:
# Deletion of Rows
df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

df = df.append(df2)

# Drop rows with label 0
df = df.drop(0)

print (df)

   a  b
1  3  4
1  7  8
