# Pandas (Panel-data)

The Pandas library is built on NumPy and provides easy-to-use data structures and data analysis tools for the Python programming language.


### Convetional way to import Pandas in Python

In [1]:
import pandas as pd

### SERIES: A one-dimensional labeled array capable of holding any data type

In [20]:
data = pd.Series(["John","Doe",38], index=('First name','Last name','Age'))
print(data)

First name    John
Last name      Doe
Age             38
dtype: object


### DATAFRAME: A two-dimensional labeled data structure with columns of potentially different types

In [21]:
dict = {'Name':['Harry','Mark','Tony'],'Last name':['Potter','Zuckerburg','Stark'],'Age':[16,20,25]}
data = pd.DataFrame(dict,columns=['Name','Last name','Age'])

print(data)

    Name   Last name  Age
0  Harry      Potter   16
1   Mark  Zuckerburg   20
2   Tony       Stark   25


In [15]:
dict = {'Name':['Harry','Mark','Tony'],'Last name':['Potter','Zuckerburg','Stark'],'Age':[16,20,25]}
data = pd.DataFrame(dict)
print(data)

   Age   Last name   Name
0   16      Potter  Harry
1   20  Zuckerburg   Mark
2   25       Stark   Tony


In [16]:
mylist = [['Harry','Mark','Tony'],['Potter','Zuckerburg','Stark'],[16,20,25]]
data = pd.DataFrame(mylist)
data.head()

Unnamed: 0,0,1,2
0,Harry,Mark,Tony
1,Potter,Zuckerburg,Stark
2,16,20,25


### Reading & Writing csv

In [6]:
#data.to_csv("myPanda.csv")
df = pd.read_csv("myPanda.csv")
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
   1       0           3   name
0  0   Harry        Mark   Tony
1  1  Potter  Zuckerburg  Stark
2  2      16          20     25


In [7]:
data = df.values
type(data)

numpy.ndarray

### Accessing Elements

In [31]:
# Initialise dataframe
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns = ['a','b','c'])
print(df)

   a  b  c
0  1  2  3
1  4  5  6
2  7  8  9


In [33]:
# Access entire column
print(df['a'])

0    1
1    4
2    7
Name: a, dtype: int64


In [26]:
# Access set of rows for all columns
print(df[0:2])

   a  b  c
0  1  2  3
1  4  5  6


In [38]:
# Access single row
print(df[1:2])

   a  b  c
1  4  5  6


In [54]:
# Select single value by row and column
print(df.iloc[[0],[0]])

# WRONG: df.iloc[[1],['a']] 

   a
0  1


In [58]:
# Select by column label

print(df.loc[[0],['a']])

   a
0  1


In [65]:
# Select single column
print(df.ix[:,'a'])
#df.ix[1,'a']

0    1
1    4
2    7
Name: a, dtype: int64


In [69]:
# Select single row
print(df.ix[2])
print(type(df.ix[2]))

a    7
b    8
c    9
Name: 2, dtype: int64
<class 'pandas.core.series.Series'>


### Manipulating DataFrames

In [85]:
# Initialise dataframe
data = {'Country':['Belgium','India','Brazil'],'Capital':['Brussels','Delhi','Brasilia'],'Population':[11.5,130.3,20.7]}
df = pd.DataFrame(data)
print(df)

    Capital  Country  Population
0  Brussels  Belgium        11.5
1     Delhi    India       130.3
2  Brasilia   Brazil        20.7


In [86]:
# Droping entire column
df = df.drop('Country', axis = 1)
print(df)

    Capital  Population
0  Brussels        11.5
1     Delhi       130.3
2  Brasilia        20.7


In [87]:
df.drop([0])

Unnamed: 0,Capital,Population
1,Delhi,130.3
2,Brasilia,20.7


In [88]:
df.head()

Unnamed: 0,Capital,Population
0,Brussels,11.5
1,Delhi,130.3
2,Brasilia,20.7


In [89]:
# Sort by labels along an axis
df.sort_index()

Unnamed: 0,Capital,Population
0,Brussels,11.5
1,Delhi,130.3
2,Brasilia,20.7


In [91]:
#Sort by the values along an axis
df.sort_values(by='Capital')

Unnamed: 0,Capital,Population
2,Brasilia,20.7
0,Brussels,11.5
1,Delhi,130.3


In [92]:
df.rank()

Unnamed: 0,Capital,Population
0,2.0,1.0
1,3.0,3.0
2,1.0,2.0


In [96]:
df.columns
df.head()

Unnamed: 0,Capital,Population
0,Brussels,11.5
1,Delhi,130.3
2,Brasilia,20.7
