# Pandas Basics

## Pandas Series

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [2]:
# pd.Series( data = [list of values], index = [List of index], dtype )
# list of index = list of data
# data can be a list, tuple, Dictionary or array.

# dtype = str, data = Tuple
series = pd.Series( data = (5, 6, 7, 8), index = [1,2,3,4], dtype=str )
print "dtype: str, data: Tuple"
print series
print ""

# dtype = int, data = List
series = pd.Series( data = [5, 6, 7, 8], index = [1,2,3,4], dtype=int )
print "dtype: int, data: :List"
print series
print ""

# dtype = float, data = Array
series = pd.Series( data = np.array([5, 6, 7, 8]), index = [1,2,3,4], dtype=float )
print "dtype: float, data: Array"
print series
print ""

# dtype = float, data = Dictionary
# when data is a dictionary, then Keys are automatically taken as Index
series = pd.Series( data = {1:5, 2:6, 3:7, 4:8}, dtype=float )
print "dtype: float, data: Dictionary"
print series
print ""

# Creating series from a scaler
series = pd.Series( data = 5, index = [1,2,3,4], dtype=float )
print "dtype: float, data: Scaler"
print series
print ""

dtype: str, data: Tuple
1    5
2    6
3    7
4    8
dtype: object

dtype: int, data: :List
1    5
2    6
3    7
4    8
dtype: int32

dtype: float, data: Array
1    5.0
2    6.0
3    7.0
4    8.0
dtype: float64

dtype: float, data: Dictionary
1    5.0
2    6.0
3    7.0
4    8.0
dtype: float64

dtype: float, data: Scaler
1    5.0
2    5.0
3    5.0
4    5.0
dtype: float64



## Indexing Series

In [3]:
series = pd.Series( data = [5, 6, 7, 8], index = ['A','B','C','D'], dtype=float )

# Single element selection 

# Direct indexing 
print ''
print "series[1] : \n", series[1]
print ''

# Indexing using loc -- name of index
print "series.loc['B'] : \n", series.loc['B']
print ''

# Indexing using iloc -- numeric index staring from 0
print "series.iloc[1] : \n", series.iloc[1]
print ''


# Multiple element selection

# Direct indexing 
print "series[1:4] : \n", series[1:4]
print ''

# Indexing using loc -- name of index
print "series.loc[['B', 'C', 'D']] : \n", series.loc[['B', 'C', 'D']]
print "series.loc['B':'D'] : \n", series.loc['B':'D']
print ''


# Indexing using iloc -- numeric index staring from 0
print "series.iloc[1:4] : \n", series.iloc[1:4]
print ''



series[1] : 
6.0

series.loc['B'] : 
6.0

series.iloc[1] : 
6.0

series[1:4] : 
B    6.0
C    7.0
D    8.0
dtype: float64

series.loc[['B', 'C', 'D']] : 
B    6.0
C    7.0
D    8.0
dtype: float64
series.loc['B':'D'] : 
B    6.0
C    7.0
D    8.0
dtype: float64

series.iloc[1:4] : 
B    6.0
C    7.0
D    8.0
dtype: float64



## Common Functions for Series

In [4]:
series = pd.Series(data=[1,2,3,3,4,6,7,7,7,8,10,15,22,36])
print series

0      1
1      2
2      3
3      3
4      4
5      6
6      7
7      7
8      7
9      8
10    10
11    15
12    22
13    36
dtype: int64


In [5]:
# 1. All operations on series

# Mathematical terms
print "Series Mean: ", np.mean(series)
print "Series Median: ", np.median(series)
print "Series Mode: ", stats.mode(series)
print "Series Q1, Q2(Median) and Q3: ", np.percentile(a=series, q=[25, 50, 75], interpolation='midpoint')

# Shape of Series
print ''
print "Series Shape: ", series.shape
# Count total number of element
print ''
print "Series Count: ", series.count() 

# Summary of series
print ''
print "Summary Series: \n", series.describe()

# Count the value of individual items (Useful with categorical variables)
print ''
print "Value Count Series: \n",series.value_counts()

Series Mean:  9.35714285714
Series Median:  7.0
Series Mode:  ModeResult(mode=array([7], dtype=int64), count=array([3]))
Series Q1, Q2(Median) and Q3:  [3.5 7.  9. ]

Series Shape:  (14L,)

Series Count:  14

Summary Series: 
count    14.000000
mean      9.357143
std       9.467411
min       1.000000
25%       3.250000
50%       7.000000
75%       9.500000
max      36.000000
dtype: float64

Value Count Series: 
7     3
3     2
15    1
36    1
10    1
22    1
8     1
6     1
4     1
2     1
1     1
dtype: int64
