# Working with Pandas

- https://www.w3schools.com/python/pandas/default.asp

In [1]:
import numpy as np
import pandas as pd

### pandas.Series

A one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. Series are create from (1)  a scalar and an index, (2) a dictionary or (3) a numpy.ndarray. An numpy.ndarray is a multidimensional, homogeneous array of fixed-size items (https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html).

In [307]:
# From a scalar

pd.Series(5.0, index=['a', 'b', 'c'])

a    5.0
b    5.0
c    5.0
dtype: float64

In [60]:
# From a dictionary

pd.Series({"b": 1.0, "a": 0, "c": 2})

b    1.0
a    0.0
c    2.0
dtype: float64

In [6]:
# If an index is passed with the dictionary, the values in data corresponding to the labels in the index
# will be pulled out.

d = {"a": 0.0, "b": 1.0, "c": 2.0}
pd.Series(d, index=["b", "c"])

b    1.0
c    2.0
dtype: float64

In [338]:
# From an ndarray, the array has to be one dimensional and the length of the index and data are the same,
# without the index integers starting from 0 are used

pd.Series(np.random.rand(3), index=["a", "b", "c"])

a    0.938446
b    0.662389
c    0.304027
dtype: float64

In [337]:
# Accessing the series, somewhat similar to numpy.ndarray instances

series = pd.Series(np.random.randn(3), index=["a", "b", "c"])
print(series)
print(series['a'])
print(series['a':'b'])

a   -0.759163
b    0.849162
c   -0.431640
dtype: float64
-0.7591629371247881
a   -0.759163
b    0.849162
dtype: float64


In [343]:
s = pd.Series(np.random.randn(3), index=["a", "b", "c"])
print(s, end='\n\n')
print(s.median(), s[2], s['c'], end='\n\n')
print(s[s > s.median()], end='\n\n')
print(s[[1, 0]], end='\n\n')
print(s[['b', 'a']], end='\n\n')

a   -0.591038
b   -1.942116
c   -0.429191
dtype: float64

-0.5910380997993674 -0.42919099124566795 -0.42919099124566795

c   -0.429191
dtype: float64

c   -0.429191
b   -1.942116
dtype: float64

b   -1.942116
a   -0.591038
dtype: float64



In [339]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
print(s, end='\n\n')
print(s.median(), s[3], s['d'], end='\n\n')
print(s[s > s.median()], end='\n\n')
print(s[[4, 3, 1]], end='\n\n')
print(s[['e', 'd', 'b']], end='\n\n')

a    0.235224
b    0.079149
c    0.685672
d    2.627390
e   -0.627217
dtype: float64

0.23522445825230298 2.627390015499585 2.627390015499585

c    0.685672
d    2.627390
dtype: float64

e   -0.627217
d    2.627390
b    0.079149
dtype: float64

e   -0.627217
d    2.627390
b    0.079149
dtype: float64

