# Exploring Series and DataFrame

Lets create some series

In [2]:
import pandas as pd
from pandas import Series ,DataFrame
import numpy as np

In [3]:
ser1=Series([1,2,3,3,4,5,6])
ser2=Series(['a','b','c','d'])
print(ser1)

0    1
1    2
2    3
3    3
4    4
5    5
6    6
dtype: int64


In [4]:
print(ser2)

0    a
1    b
2    c
3    d
dtype: object


In [27]:
# Create pandas index
idx=pd.Index(["uttrakhand","uttrpardesh","punjab","bihar","kerla","goa"])
print(idx)

Index([u'uttrakhand', u'uttrpardesh', u'punjab', u'bihar', u'kerla', u'goa'], dtype='object')


In [28]:
pops=Series([900,8970,7000,np.nan,6700,4500],index=idx,name='population')
print(pops)

uttrakhand      900.0
uttrpardesh    8970.0
punjab         7000.0
bihar             NaN
kerla          6700.0
goa            4500.0
Name: population, dtype: float64


In [9]:
state=Series({"uttrakhand":"Dehradun","uttrapardesh":"luckhnow","punjab":"chandigarh","Gujarat":"ahmedabad"},name="state")
print(state)

Gujarat          ahmedabad
punjab          chandigarh
uttrakhand        Dehradun
uttrapardesh      luckhnow
Name: state, dtype: object


In [10]:
state_area=Series({"uutrakhand":900.8,"punjab":876.8,"up":1000.7,"bihar":789.34},name="Area")
print(state_area)

bihar          789.34
punjab         876.80
up            1000.70
uutrakhand     900.80
Name: Area, dtype: float64


Lets see some of the way we can create dataframe , first without indices

In [11]:
mat=np.arange(0,9).reshape(3,3)
print(mat)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [12]:
print(DataFrame(mat))

   0  1  2
0  0  1  2
1  3  4  5
2  6  7  8


In [13]:
#Adding lables
print(DataFrame(mat,index=["a","b","c"],columns=["alpha","beta","gamma"]))

   alpha  beta  gamma
a      0     1      2
b      3     4      5
c      6     7      8


In [15]:
#What amount of 2D Array(each Tuple a row)
arr=[(1,"a"),(2,"b"),(3,"y"),(7,"d")]
print(type(arr))
print(arr)

<type 'list'>
[(1, 'a'), (2, 'b'), (3, 'y'), (7, 'd')]


In [16]:
print(DataFrame(arr,columns=["Numbers","Letters"]))

   Numbers Letters
0        1       a
1        2       b
2        3       y
3        7       d


In [19]:
# creating from dict
print(DataFrame({"Numbers":[1,4,6],"Letters":["a","f","o"]}))

  Letters  Numbers
0       a        1
1       f        4
2       o        6


In [20]:
# what if not all list are same length?
# we get an error
print(DataFrame({"Numbers":[1,4,6,8],"Letters":["a","f","o"]}))


ValueError: arrays must all be same length

In [21]:
# do we get an error?
print(DataFrame({"Numbers":ser1,"letters":ser2}))

   Numbers letters
0        1       a
1        2       b
2        3       c
3        3       d
4        4     NaN
5        5     NaN
6        6     NaN


Lets now create a dataframe containing cities

In [22]:
# when passed as list, series created as row
#these series are not the same length  nor all have the same enteries , nan will be generated
print(DataFrame([pops,state,state_area]))

            uttrkhand  uttrpardesh      punjab   bihar   kerla     goa  \
population      900.0       8970.0        7000     NaN  6700.0  4500.0   
state             NaN          NaN  chandigarh     NaN     NaN     NaN   
Area              NaN          NaN       876.8  789.34     NaN     NaN   

              Gujarat uttrakhand uttrapardesh      up  uutrakhand  
population        NaN        NaN          NaN     NaN         NaN  
state       ahmedabad   Dehradun     luckhnow     NaN         NaN  
Area              NaN        NaN          NaN  1000.7       900.8  


In [25]:
print(DataFrame({"population":pops,"capital":state,"Area":state_area}))

                 Area     capital  population
Gujarat           NaN   ahmedabad         NaN
bihar          789.34         NaN         NaN
goa               NaN         NaN      4500.0
kerla             NaN         NaN      6700.0
punjab         876.80  chandigarh      7000.0
up            1000.70         NaN         NaN
uttrakhand        NaN    Dehradun         NaN
uttrapardesh      NaN    luckhnow         NaN
uttrkhand         NaN         NaN       900.0
uttrpardesh       NaN         NaN      8970.0
uutrakhand     900.80         NaN         NaN


In [29]:
# Lets append data into series
pops.append(Series({"tamilnadu":9999,"andrapardesh":7777}))

uttrakhand       900.0
uttrpardesh     8970.0
punjab          7000.0
bihar              NaN
kerla           6700.0
goa             4500.0
andrapardesh    7777.0
tamilnadu       9999.0
dtype: float64

In [31]:
df= DataFrame([pops,state,state_area]).T
df.append(DataFrame({"population":Series({"tamilnadu":9999,"andrapardesh":7777}),
                    "State":Series({"tamilnadu":"chennai","andrapardesh":"hyderbad"}),
                    "Area":Series({"tamilnadu":900,"andrapardesh":899})}))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,Area,State,population,state
uttrakhand,,,900.0,Dehradun
uttrpardesh,,,8970.0,
punjab,876.8,,7000.0,chandigarh
bihar,789.34,,,
kerla,,,6700.0,
goa,,,4500.0,
Gujarat,,,,ahmedabad
uttrapardesh,,,,luckhnow
up,1000.7,,,
uutrakhand,900.8,,,


In [34]:
pd.concat([df,DataFrame({"Numbers":Series(np.arange(0,6),index=pops.index),
                        "letters":Series(["a","h","b","c""g"])})])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,Area,Numbers,letters,population,state
uttrakhand,,,,900.0,Dehradun
uttrpardesh,,,,8970.0,
punjab,876.8,,,7000.0,chandigarh
bihar,789.34,,,,
kerla,,,,6700.0,
goa,,,,4500.0,
Gujarat,,,,,ahmedabad
uttrapardesh,,,,,luckhnow
up,1000.7,,,,
uutrakhand,900.8,,,,


In [35]:
# saving dataframe
df.to_csv("sample.csv")