In [2]:
import numpy as np
import pandas as pd

# Series

In [3]:
# Trivial Series data object with default indices
a = pd.Series([34,23,56,76,212,56,776])
print("a={}".format(a))
print("Index={}, Values={}\n".format(a.index, a.values))

# Accessing individual elements of a series with default indices
print("a[3]={}".format(a[3]))
print("Slicing is also same as in numpy a[2:6]={}".format(a[2:6]))

a=0     34
1     23
2     56
3     76
4    212
5     56
6    776
dtype: int64
Index=RangeIndex(start=0, stop=7, step=1), Values=[ 34  23  56  76 212  56 776]

a[3]=76
Slicing is also same as in numpy a[2:6]=2     56
3     76
4    212
5     56
dtype: int64


In [4]:
# Series object with custom indices
a = pd.Series([34,23,56,76,212,56,776], index=["A","B","C","D","E","F","NA"])

print("a={}".format(a))
print("Index={}, Values={}\n".format(a.index, a.values))

# Accessing individual elements of a series with default indices
print("a[3]={}".format(a[3]))
print("a[NA]={}".format(a["NA"]))
print("Slicing is also same as in numpy a[2:6]={}".format(a[2:6]))
print("Slicing is also same as in numpy a[B:E]={}".format(a["B":"E"]))

a=A      34
B      23
C      56
D      76
E     212
F      56
NA    776
dtype: int64
Index=Index(['A', 'B', 'C', 'D', 'E', 'F', 'NA'], dtype='object'), Values=[ 34  23  56  76 212  56 776]

a[3]=76
a[NA]=776
Slicing is also same as in numpy a[2:6]=C     56
D     76
E    212
F     56
dtype: int64
Slicing is also same as in numpy a[B:E]=B     23
C     56
D     76
E    212
dtype: int64


## DataFrame

In [14]:
corigin = pd.Series(["egypt","india","pakistan", "china","india", "tibet","us"], index=["nile","ganga","indus","brahmaputra","yangtze","mekong","mississippi"])
length = pd.Series([10434,453445,2132,324325,1213,343,5435], index=["nile","ganga","indus","brahmaputra","yangtze","mekong","mississippi"])
ncountries = pd.Series([3,6,2,8,9,2,1], index=["nile","ganga","indus","brahmaputra","yangtze","mekong","mississippi"])
significance = pd.Series([10,9,8,7,6,5,4], index=["nile","ganga","indus","brahmaputra","yangtze","mekong","mississippi"])
test = pd.Series([10,9,6,5,4], index=["nile","ganga","yangtze","mekong","mississippi"]) # Notice in sufficient data in this column

rivers=pd.DataFrame({"Origin":corigin, "Length":length, "Num-Countries":ncountries, "Local-Significance":significance, "Test":test})
print(rivers)

# Accessing all the indices and values
print("Indices={}\nValues={}\n".format(rivers.index, rivers.values))
print("rivers['Origin']={}\n".format(rivers['Origin']))

               Origin  Length  Num-Countries  Local-Significance  Test
brahmaputra     china  324325              8                   7   NaN
ganga           india  453445              6                   9   9.0
indus        pakistan    2132              2                   8   NaN
mekong          tibet     343              2                   5   5.0
mississippi        us    5435              1                   4   4.0
nile            egypt   10434              3                  10  10.0
yangtze         india    1213              9                   6   6.0
Indices=Index(['brahmaputra', 'ganga', 'indus', 'mekong', 'mississippi', 'nile',
       'yangtze'],
      dtype='object')
Values=[['china' 324325 8 7 nan]
 ['india' 453445 6 9 9.0]
 ['pakistan' 2132 2 8 nan]
 ['tibet' 343 2 5 5.0]
 ['us' 5435 1 4 4.0]
 ['egypt' 10434 3 10 10.0]
 ['india' 1213 9 6 6.0]]

rivers['Origin']=brahmaputra       china
ganga             india
indus          pakistan
mekong            tibet
mississippi   

## From numpy arrays

In [37]:
a = pd.DataFrame(np.random.rand(3,2), columns=["A", "B"], index=["ID1","ID2","ID3"])
b = pd.DataFrame(np.random.randint(6,size=(3,2)), columns=["A", "B"], index=["ID1","ID2","ID3"])
print("a=\n{}\n\nb=\n{}\n".format(a,b))
print("a['A']=\n{}".format(a["A"])) # Note the difference to the structured arrays of numpy. In case of structured
                                    # arrays of numpy, the rows can be accessed by indexing where as in this case
                                    # it is not possible

# Note pandas index object is immutable. They can be accessed but cannot be modified
i = a.index
print("\nIndices of a=\n{}".format(i))
print("Accessing an index is fine, i[2]={}".format(i[2]))
# But changing an index value is not allowed ... The following will give an error
#i[2] = "New Label"

a=
            A         B
ID1  0.780158  0.922878
ID2  0.114365  0.066713
ID3  0.505939  0.985914

b=
     A  B
ID1  1  1
ID2  0  3
ID3  1  3

a['A']=
ID1    0.780158
ID2    0.114365
ID3    0.505939
Name: A, dtype: float64

Indices of a=
Index(['ID1', 'ID2', 'ID3'], dtype='object')
Accessing an index is fine, i[2]=ID3


# Accessing using indexes: loc, iloc, ix

In [47]:
# Series data
a = pd.Series([34,23,56,76,212,56,776], index=["A","B","C","D","E","F","NA"])
print(a)
# Notice the 'i' in iloc which stands for implicit
print("a.loc[C]={}".format(a.loc["C"]))

A      34
B      23
C      56
D      76
E     212
F      56
NA    776
dtype: int64
a.loc[C]=56
