In [None]:
import numpy as np
np.random.seed(0)
x3 = np.random.randint(10, size=(3, 4, 5))

In [None]:
x3.itemsize         #size of each element
x3.nbytes           #total size of the array i.e. itemsize * x3.size

480

In [None]:
type(x3[0,3,])

numpy.ndarray

### Array slicing

x[start:stop:step]

one dimensional array slicing

In [None]:
x1 = np.arange(10)
x1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
x1[0:-4:2]

array([0, 2, 4])

In [None]:
x0 = x1[::-1]
x0

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [None]:
x1[8::-2]   #reverse the array starting from index 8 by step 2

array([8, 6, 4, 2, 0])

In [None]:
x1[1:9]

array([1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
x3

array([[[5, 0, 3, 3, 7],
        [9, 3, 5, 2, 4],
        [7, 6, 8, 8, 1],
        [6, 7, 7, 8, 1]],

       [[5, 9, 8, 9, 4],
        [3, 0, 3, 5, 0],
        [2, 3, 8, 1, 3],
        [3, 3, 7, 0, 1]],

       [[9, 9, 0, 4, 7],
        [3, 2, 7, 2, 0],
        [0, 4, 5, 5, 6],
        [8, 4, 1, 4, 9]]])

In [None]:
x4 = np.random.randint(10, size=(3,4))
x4

array([[9, 9, 3, 6],
       [7, 2, 0, 3],
       [5, 9, 4, 4]])

In [None]:
x4[:3:,:1:]

array([[9],
       [7],
       [5]])

# Pandas

Three fundamental data structures are there ``Series``, ``Dataframe`` and ``Index``

In [1]:
import pandas as pd
import numpy as np

##Series

In [2]:
data = pd.Series([.25, .5, .75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [9]:
data[1:3]

1    0.50
2    0.75
dtype: float64

## ``Series`` as generalized Numpy array

In [3]:
data1 = pd.Series(data = data.values,
                 index=['a','b','c','d'])
data1

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [16]:
data2 = pd.Series(data = data.values,
                 index=[2,8,4,7])
data2

2    0.25
8    0.50
4    0.75
7    1.00
dtype: float64

In [24]:
data2[4]

0.75

### As specalized dictionary

A specialization of Python dictionary with typed keys and typed values.
This typing is important: just as the type-specific compiled code behind a NumPy array makes it more efficient than a Python list for certain operations, the type information of a Pandas ``Series`` makes it much more efficient than Python dictionaries for certain operations.

The ``Series``-as-dictionary analogy can be made even more clear by constructing a ``Series`` object directly from a Python dictionary:

In [2]:
population_dict = {'Kolkata': 110000000,
                   'Mumbai': 11258484,
                   'Kochi': 4588962,
                   'Chennai': 895262}
population = pd.Series(population_dict)
population

Kolkata    110000000
Mumbai      11258484
Kochi        4588962
Chennai       895262
dtype: int64

In [30]:
##Slicing
print(population['Kolkata':'Kochi'])
print(population['Kolkata':'Kochi':2])

Kolkata    110000000
Mumbai      11258484
Kochi        4588962
dtype: int64
Kolkata    110000000
Kochi        4588962
dtype: int64


```python
>>> pd.Series(data, index=index)
```
``data`` can be any of list, NumPy array, dictionary, even a single entity(which will be treated as repeated data depending on the ``index`` range)
``index`` is optional argument

In [32]:
data4 = pd.Series(5, index=[100,200,30])  #repeated values
data4

100    5
200    5
30     5
dtype: int64

## As DataFrame object

In [3]:
area_dict = {'Kolkata':4598,
             'Mumbai':5897,
             'Kochi':2365,
             'Chennai':3265}
area = pd.Series(area_dict)
area

Kolkata    4598
Mumbai     5897
Kochi      2365
Chennai    3265
dtype: int64

In [5]:
df = pd.DataFrame({'population':population,            #a specialized version of a Python Dictionary or can be treated as a two dimensional Seires
                  'area':area})
df

Unnamed: 0,population,area
Kolkata,110000000,4598
Mumbai,11258484,5897
Kochi,4588962,2365
Chennai,895262,3265


In [7]:
df.index

Index(['Kolkata', 'Mumbai', 'Kochi', 'Chennai'], dtype='object')

In [8]:
df.columns

Index(['population', 'area'], dtype='object')

# Data Selection in Series

In [5]:
data1

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [7]:
data1['c']

0.75

In [8]:
data1[2]

0.75

In [9]:
data1['e'] = 0.8    #modifying the series
data1

a    0.25
b    0.50
c    0.75
d    1.00
e    0.80
dtype: float64

In [11]:
data1['c'] = 0.74
data1

a    0.25
b    0.50
c    0.74
d    1.00
e    0.80
dtype: float64

In [13]:
data1['a':'c'] #slicing by explicit index
data1[0:3] #slicing by implicit integer index

a    0.25
b    0.50
c    0.74
dtype: float64

In [17]:
#masking
#data1[(data>0.3)&(data<0.8)]
data1[(data1 > 0.3) & (data1 < 0.8)]

b    0.50
c    0.74
dtype: float64

## Indexers: loc, iloc, ix