# Introduction to Pandas DataFrame

In [1]:
import pandas as pd
import numpy as np

In [4]:
city_pop = {'Istanbul': 15000000, 'Izmir': 2500000, 'Ankara':3000000}
city_area = {'Istanbul':212, 'Ankara':312, 'Izmir':332}

population = pd.Series(city_pop)
area = pd.Series(city_area)

In [5]:
population

Ankara       3000000
Istanbul    15000000
Izmir        2500000
dtype: int64

In [6]:
area

Ankara      312
Istanbul    212
Izmir       332
dtype: int64

In [7]:
cities = pd.DataFrame({'Area': area, 'Population': population})
cities

Unnamed: 0,Area,Population
Ankara,312,3000000
Istanbul,212,15000000
Izmir,332,2500000


In [8]:
cities.index

Index(['Ankara', 'Istanbul', 'Izmir'], dtype='object')

In [9]:
cities.columns

Index(['Area', 'Population'], dtype='object')

In [10]:
cities['Area']

Ankara      312
Istanbul    212
Izmir       332
Name: Area, dtype: int64

In [12]:
cities['Population']

Ankara       3000000
Istanbul    15000000
Izmir        2500000
Name: Population, dtype: int64

In [29]:
cities['Density'] = cities['Population'] / cities['Area']
cities.Density

Ankara       9615.384615
Istanbul    70754.716981
Izmir        7530.120482
Name: Density, dtype: float64

In [30]:
cities

Unnamed: 0,Area,Population,Density
Ankara,312,3000000,9615.384615
Istanbul,212,15000000,70754.716981
Izmir,332,2500000,7530.120482


In [31]:
cities.values

array([[  3.12000000e+02,   3.00000000e+06,   9.61538462e+03],
       [  2.12000000e+02,   1.50000000e+07,   7.07547170e+04],
       [  3.32000000e+02,   2.50000000e+06,   7.53012048e+03]])

In [32]:
cities.T

Unnamed: 0,Ankara,Istanbul,Izmir
Area,312.0,212.0,332.0
Population,3000000.0,15000000.0,2500000.0
Density,9615.385,70754.72,7530.12


In [33]:
cities

Unnamed: 0,Area,Population,Density
Ankara,312,3000000,9615.384615
Istanbul,212,15000000,70754.716981
Izmir,332,2500000,7530.120482


In [34]:
cities.values[0]

array([  3.12000000e+02,   3.00000000e+06,   9.61538462e+03])

In [35]:
cities.iloc[:3, :2]

Unnamed: 0,Area,Population
Ankara,312,3000000
Istanbul,212,15000000
Izmir,332,2500000


In [36]:
cities.loc[:'Istanbul', :'Population']

Unnamed: 0,Area,Population
Ankara,312,3000000
Istanbul,212,15000000


In [38]:
cities.loc[cities.Density > 100, ['Population', 'Area']]

Unnamed: 0,Population,Area
Ankara,3000000,312
Istanbul,15000000,212
Izmir,2500000,332


In [40]:
cities.iloc[0,2] = 736
cities

Unnamed: 0,Area,Population,Density
Ankara,312,3000000,736.0
Istanbul,212,15000000,70754.716981
Izmir,332,2500000,7530.120482


In [42]:
cities.loc['Istanbul', 'Density'] = 2323
cities

Unnamed: 0,Area,Population,Density
Ankara,312,3000000,736.0
Istanbul,212,15000000,2323.0
Izmir,332,2500000,7530.120482


In [16]:
data = pd.DataFrame(population, columns=['Population'])
data

Unnamed: 0,Population
Ankara,3000000
Istanbul,15000000
Izmir,2500000


In [20]:
data = [{'a': 1, 'b': 2 * i} for i in range(5)]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b
0,1,0
1,1,2
2,1,4
3,1,6
4,1,8


In [21]:
df = pd.DataFrame([{'a':1, 'b':2}, {'a':3, 'b':4}])
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [23]:
df = pd.DataFrame(np.random.rand(3,2), columns=['a','b'], index=[0, 1, 2])
df

Unnamed: 0,a,b
0,0.861226,0.276579
1,0.32587,0.057027
2,0.308663,0.037194
