# Creating and accessing data from Pandas objects

We run through the basic commands in pandas for creating series, dataframes, index objects and accessing data from them

Reference : [Python DataScience Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/), Chapter 3, Introducing Pandas Objects

In [1]:
import pandas as pd

# Series

## Creating a Series

In [3]:
#Creating a series from a list
series_1 = pd.Series([2,4,6,8])
series_1

0    2
1    4
2    6
3    8
dtype: int64

In [4]:
#Creating a series from a list and specifying indices
series_2 = pd.Series([2,4,6,8], index = ['a', 'b', 'c', 'd'])
series_2

a    2
b    4
c    6
d    8
dtype: int64

In [5]:
#Creating a series from a dictionary
series_3 = pd.Series({'e': 2, 'f': 4, 'g': 6, 'h': 8})
series_3

e    2
f    4
g    6
h    8
dtype: int64

In [12]:
#Creating a series from part of a dictionary
series_4 = pd.Series({'e': 2, 'f': 4, 'g': 6, 'h': 8}, index = ['g', 'h'])
series_4

g    6
h    8
dtype: int64

In [15]:
#Creating a constant value series
series_5 = pd.Series('c', index=[1,3,5,7])
series_5

1    c
3    c
5    c
7    c
dtype: object

## Accessing data from a series

In [13]:
# Accessing an element from a series
series_1[1], series_2['a'], series_2.iloc[0], series_4.loc['g']

(4, 2, 2, 6)

In [18]:
#All values of a series
series_3.values, type(series_3.values)

(array([2, 4, 6, 8]), numpy.ndarray)

In [19]:
#Indices of a series
series_3.index, type(series_3.index)

(Index(['e', 'f', 'g', 'h'], dtype='object'), pandas.core.indexes.base.Index)

In [26]:
#Slice of a series
series_3[['e', 'f']], type(series_3[['e', 'f']])

(e    2
 f    4
 dtype: int64,
 pandas.core.series.Series)

In [25]:
#Slice of a series
series_3[0:3], type(series_3[0:2])

(e    2
 f    4
 g    6
 dtype: int64,
 pandas.core.series.Series)

In [28]:
#Slice of a series
series_3['e':'g'], type(series_3['e':'g'])

(e    2
 f    4
 g    6
 dtype: int64,
 pandas.core.series.Series)

# Index

In [47]:
a = pd.Series([1,3,5,7], index=[0,3,2,1])
b = pd.Series([10, 'a', 'b'], index = [1, 'A', 'B'])

In [48]:
#Index objects of series
a.index, b.index

(Int64Index([0, 3, 2, 1], dtype='int64'), Index([1, 'A', 'B'], dtype='object'))

In [51]:
#Shape and type of index object
a.index.size, a.index.shape, a.index.ndim, a.index.dtype


(4, (4,), 1, dtype('int64'))

## Index object as an ordered set

In [41]:
#Set operations on index objects : intersection, union, symmetric difference
a.index & b.index, a.index | b.index, a.index ^ b.index

(Index([1], dtype='object'),
 Index([0, 1, 2, 3, 'A', 'B'], dtype='object'),
 Index([0, 2, 3, 'A', 'B'], dtype='object'))

In [46]:
#Set operations as methods of index objects
a.index.intersection(b.index),a.index.union(b.index),a.index.symmetric_difference(b.index)

(Index([1], dtype='object'),
 Index([0, 1, 2, 3, 'A', 'B'], dtype='object'),
 Index([0, 2, 3, 'A', 'B'], dtype='object'))

## Index object as an immutable array

Immutable because you cannot modify an index value .. a.index[0] = 'something_else' will give an error

In [55]:
#Array operations
a.index, a.index[:-1], a.index[[1,2]], a.index[0::2]

(Int64Index([0, 3, 2, 1], dtype='int64'),
 Int64Index([0, 3, 2], dtype='int64'),
 Int64Index([3, 2], dtype='int64'),
 Int64Index([0, 2], dtype='int64'))

# DataFrames

## Creating a DataFrame

In [59]:
#Creating an empty dataframe
column_names_1 = ["A","B", "C","D"]
row_names_1 = ['name','place','animal', 'thing']
df_1 = pd.DataFrame(columns = column_names_1, index = row_names_1)
df_1

Unnamed: 0,A,B,C,D
name,,,,
place,,,,
animal,,,,
thing,,,,


In [66]:
#Creating a dataframe from a list of lists/2-d numpy arrays
df_2 = pd.DataFrame([[1,2,3,4],[5,6,7,8]], columns = ['c1', 'c2', 'c3', 'c4'], index = ['cat', 'dog'])
df_2

Unnamed: 0,c1,c2,c3,c4
cat,1,2,3,4
dog,5,6,7,8


In [79]:
#Creating a dataframe from a dictionary of series objects
series_a = pd.Series([23,45,231,21], index = ['X', 'Y', 'Z', 'W'])
series_b = pd.Series([12,15,31,43], index = ['X', 'Y', 'c', 'd'])

df_3 = pd.DataFrame({'a_series': series_a,
              'b_series': series_b})
df_3

Unnamed: 0,a_series,b_series
W,21.0,
X,23.0,12.0
Y,45.0,15.0
Z,231.0,
c,,31.0
d,,43.0


In [77]:
#Creating a dataframe from a dictionary of dictionaries
dict_a = {'X':23, 'Y':45, 'Z':231, 'W':21}
dict_b = {'X': 12, 'Y': 15, 'c': 31, 'd':43}

df_4 = pd.DataFrame({'a_dict': series_a,
              'b_dict': series_b})
df_4

Unnamed: 0,a_dict,b_dict
W,21.0,
X,23.0,12.0
Y,45.0,15.0
Z,231.0,
c,,31.0
d,,43.0


In [78]:
#Creating a dataframe from a list of series objects
series_a = pd.Series([23,45,231,21], index = ['X', 'Y', 'Z', 'W'])
series_b = pd.Series([12,15,31,43], index = ['X', 'Y', 'c', 'd'])

df_5 = pd.DataFrame([series_a,series_b])
df_5

Unnamed: 0,X,Y,Z,W,c,d
0,23.0,45.0,231.0,21.0,,
1,12.0,15.0,,,31.0,43.0


In [76]:
#Creating a dataframe from a list of dictionaries
dict_a = {'X':23, 'Y':45, 'Z':231, 'W':21}
dict_b = {'X': 12, 'Y': 15, 'c': 31, 'd':43}

df_6 = pd.DataFrame([dict_a,dict_b])
df_6

Unnamed: 0,X,Y,Z,W,c,d
0,23,45,231.0,21.0,,
1,12,15,,,31.0,43.0


## Accessing data from a dataframe

In [80]:
df_3

Unnamed: 0,a_series,b_series
W,21.0,
X,23.0,12.0
Y,45.0,15.0
Z,231.0,
c,,31.0
d,,43.0


In [82]:
# Columns and indices of a dataframe
df_3.columns, df_3.index, type(df_3.columns), type(df_3.index)

(Index(['a_series', 'b_series'], dtype='object'),
 Index(['W', 'X', 'Y', 'Z', 'c', 'd'], dtype='object'),
 pandas.core.indexes.base.Index,
 pandas.core.indexes.base.Index)