# Pandas Introduction

`pandas` is a Python Package that provides a powerful Data analysis and manipulation library
for Python. It provides 
1. Data Structures for convenient storage of univariate/multivariate data.
2. The tools for manipulating data. 

`pandas` is built on top of NumPy.

`pandas` is typically imported as

In [1]:
import pandas as pd

`Series` and `DataFrames` are the two important data structures provided by Pandas.

In [2]:
import numpy as np

In [3]:
x = [1, 1.2, 1.4, 1.6, 1.8, 2.0]
y = np.random.randn(10)

In [3]:
x

[1, 1.2, 1.4, 1.6, 1.8, 2.0]

In [4]:
y

array([ 0.85979671, -0.08267923,  1.05141545,  1.62388402,  0.7767119 ,
       -0.25824108,  1.2851213 ,  0.00570723,  0.87485246, -0.92873965])

In [5]:
x1 = pd.Series(x)

In [6]:
x1

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
dtype: float64

In [6]:
x2 = pd.Series([2.7, 2.1, 3.2, 2.1, 2.0, 3.2, 3.6])
x2

0    2.7
1    2.1
2    3.2
3    2.1
4    2.0
5    3.2
6    3.6
dtype: float64

In [7]:
x2.values

array([2.7, 2.1, 3.2, 2.1, 2. , 3.2, 3.6])

In [8]:
type(x2.values)

numpy.ndarray

In [15]:
x2.size

7

In [18]:
x2.index = ['a', 'b', 'c', 'd', 'e', 'f', 'g']

In [19]:
x2

a    2.7
b    2.1
c    3.2
d    2.1
e    2.0
f    3.2
g    3.6
dtype: float64

In [9]:
x3 = pd.Series([10, 15, 13, 18, 11], index = ['A', 'B', 'C', 'D', 'E'])
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [23]:
x3['B':'E']

B    15
C    13
D    18
E    11
dtype: int64

In [26]:
x3[1:3]

B    15
C    13
dtype: int64

In [27]:
x3[[0, 2, 4]]

A    10
C    13
E    11
dtype: int64

In [47]:
x3[x3>=15]

B    15
D    18
dtype: int64

In [11]:
eligible = x3>= 15
x3[eligible]

B    15
D    18
dtype: int64

In [6]:
y1 = pd.Series(y)
y1

0    1.379368
1   -1.469972
2   -1.634359
3   -0.242335
4    0.675309
5   -0.031965
6   -0.410442
7   -0.158391
8    0.367156
9    0.292908
dtype: float64

In [28]:
y1[1]

-1.469971740662337

In [33]:
y1[:3]

0    1.379368
1   -1.469972
2   -1.634359
dtype: float64

In [34]:
y1[[2, 6, 8]]

2   -1.634359
6   -0.410442
8    0.367156
dtype: float64

In [37]:
y2 = y1[[2, 6, 2]]
y2

2   -1.634359
6   -0.410442
2   -1.634359
dtype: float64

In [74]:
y2.iloc[1]

-0.41044170171593974

In [13]:
x4 = pd.Series({'A':34, 'C':38, 'E': 32})
x4

A    34
C    38
E    32
dtype: int64

In [14]:
x4.loc['A']

34

In [15]:
x4.iloc[0]

34

In [16]:
x4[0]

34

In [51]:
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [56]:
x3 = x3[:3]
x3

A    10
B    15
C    13
dtype: int64

In [17]:
x4 = pd.Series({'C':34, 'B':38, 'A': 32})
x4

C    34
B    38
A    32
dtype: int64

In [83]:
x4*2;,.  

C    68
B    76
A    64
dtype: int64

In [18]:
x5 = pd.Series({'A':14, 'B':18, 'C': 12})
x5

A    14
B    18
C    12
dtype: int64

In [19]:
x4+x5

A    46
B    56
C    46
dtype: int64

In [64]:
x3.name = 'Scores'
x3.index.name = 'Player'
x3

Player
A    10
B    15
C    13
Name: Scores, dtype: int64

In [66]:
x3[0]

10

In [12]:
df1 = pd.DataFrame({'x':x, 'y':y[:6]})

In [13]:
df1

Unnamed: 0,x,y
0,1.0,0.309078
1,1.2,0.048749
2,1.4,-1.399838
3,1.6,0.066904
4,1.8,-0.357084
5,2.0,-0.240697


In [14]:
df1.index

RangeIndex(start=0, stop=6, step=1)

In [15]:
df1.columns

Index(['x', 'y'], dtype='object')

In [16]:
df1.x

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [17]:
df1['x']

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [18]:
df1.columns = ['x values', 'y values']
df1

Unnamed: 0,x values,y values
0,1.0,0.309078
1,1.2,0.048749
2,1.4,-1.399838
3,1.6,0.066904
4,1.8,-0.357084
5,2.0,-0.240697


In [20]:
df1.index = ['a', 'b', 'c', 'd', 'e', 'f']

In [21]:
df1

Unnamed: 0,x values,y values
a,1.0,0.309078
b,1.2,0.048749
c,1.4,-1.399838
d,1.6,0.066904
e,1.8,-0.357084
f,2.0,-0.240697


In [23]:
df1['x values']

a    1.0
b    1.2
c    1.4
d    1.6
e    1.8
f    2.0
Name: x values, dtype: float64

In [24]:
df1.mean()

x values    1.500000
y values   -0.262148
dtype: float64

In [26]:
df1['id'] = ['i1', 'i2', 'i3', 'i4', 'i5', 'i6']
df1

Unnamed: 0,x values,y values,id
a,1.0,0.309078,i1
b,1.2,0.048749,i2
c,1.4,-1.399838,i3
d,1.6,0.066904,i4
e,1.8,-0.357084,i5
f,2.0,-0.240697,i6


In [27]:
df1.mean()

x values    1.500000
y values   -0.262148
dtype: float64

In [28]:
df1.std()

x values    0.374166
y values    0.605812
dtype: float64

In [29]:
df1.var()

x values    0.140000
y values    0.367008
dtype: float64