#### Pandas is designed for working with tabular or heterogeneous data

#### Dataframe : SQL database table or a worksheet in a spreadsheet application
#### Series : selecting a column from Dataframe

In [49]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

### Creating a series
#### A series is a 1D array => sequence of values + associated array of data labels, called its index

In [50]:
#Creating series from a list
series = pd.Series([4, 7, -5, 3])
print(series)
print('Printing values Without index')
print(series.values)
print('Printing index')
print(series.index)
series.index

0    4
1    7
2   -5
3    3
dtype: int64
Printing values Without index
[ 4  7 -5  3]
Printing index
RangeIndex(start=0, stop=4, step=1)


RangeIndex(start=0, stop=4, step=1)

In [51]:
#Creating series using dict : keys are used as index
pd.Series({'Apple':'Fruit',
           'Tomato':'Vegetable',
           'Orange':'Fruit',
           'Brinjal':'Vegetable'})

Apple          Fruit
Brinjal    Vegetable
Orange         Fruit
Tomato     Vegetable
dtype: object

In [52]:
#Creating series using numpy
import numpy as np
pd.Series(np.random.normal(size=5))

0    1.215915
1   -0.748082
2    0.321977
3   -0.558395
4   -0.126930
dtype: float64

#### How index is used in series

In [53]:
#index with label for each datapoint
series2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
print(series2)
print('Printing values Without index')
print(series2.values)
print('Printing index')
print(series2.index)
obj2.index

d    4
b    7
a   -5
c    3
dtype: int64
Printing values Without index
[ 4  7 -5  3]
Printing index
Index(['d', 'b', 'a', 'c'], dtype='object')


RangeIndex(start=0, stop=4, step=1)

In [54]:
print('Element with index c',series2['c'])
print(series2[['c', 'a', 'd']])

Element with index c 3
c    3
a   -5
d    4
dtype: int64


#### Using NumPy functions on Series

In [56]:
series2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

In [57]:
#Applying Lambda
series2.apply(lambda x: x ** 2) 

d    16
b    49
a    25
c     9
dtype: int64

In [58]:
print(series2)
print(series2.sum())
print(series2.mean())
print(series2.cumsum())

d    4
b    7
a   -5
c    3
dtype: int64
9
2.25
d     4
b    11
a     6
c     9
dtype: int64


In [59]:
#Using numpy with pandas
np.exp(series2) 

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

#### Boolean Selection

In [None]:
#Using NumPy functions or NumPy-like operations will preserve the index
series2[series2 > 0] # Which rows has values greater than zero

#### Accessing values & index

In [60]:
print(series2.values)
print(series2.index)
print(type(series2.values))
print(type(series2.index))

[ 4  7 -5  3]
Index(['d', 'b', 'a', 'c'], dtype='object')
<class 'numpy.ndarray'>
<class 'pandas.core.indexes.base.Index'>


In [61]:
print(len(series2),series2.size,series2.shape)

4 4 (4,)


In [62]:
#Another way to think about a Series is as a fixed-length, ordered dict, 
# as it is a mapping of index values to data values. 
'b' in series2

True

In [72]:
#Creating a series for python dict
dataDict = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
series3 = pd.Series(dataDict)
series3

Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [75]:
#Overiding default index(keys in sorted order)
states = ['California', 'Ohio', 'Oregon', 'Texas'] #NAN if no value is found like for California 
series4 = pd.Series(dataDict, index=states)
series4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [76]:
#Idntify missing data
pd.isnull(series4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [77]:
pd.notnull(series4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [67]:
#Series’s index can be altered in-place by assignment
print(obj)
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
print("After index change")
print(obj)

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64
After index change
Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64


#### Retrieving values in a Series by label or position

In [73]:
print(series3['Ohio'])
print(series3[['Ohio','Texas']]) # NOTE : For Multiple Items

35000
Ohio     35000
Texas    71000
dtype: int64


In [69]:
#Referring using position : loc & iloc ==> Refer PandasDataframeHousingData

#### Subsetting series : getting range of values elements

In [74]:
series3['Ohio':'Texas']

Ohio      35000
Oregon    16000
Texas     71000
dtype: int64

In [70]:
#Creating DataFrame from dict of equal-length (lists or NumPy arrays)
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002
5,3.2,Nevada,2003
